Commit 61793e67 authored by André Araujo's avatar André Araujo Committed by Lukasz Kaiser
Browse files

Recent DELF updates, including initial Detect-to-Retrieve code (#6599)

* Internal change.

PiperOrigin-RevId: 185564155

* Fix small bug when reading DELF features when file is empty.
A new test is added that catches this bug.

PiperOrigin-RevId: 213839503

* Refactors DELF example code to expose a function to create a DELF feature extractor.

PiperOrigin-RevId: 241492615

* Merged commit includes the following changes:
244073180  by Andre Araujo:

    Internal change

--
243646498  by Andre Araujo:

    Detect and save bounding boxes for a list of images.
    A new proto is added, along with auxiliary read/write functions, and tests.

--

PiperOrigin-RevId: 244073180
parent f519c015
// Protocol buffer for serializing detected bounding boxes.
syntax = "proto2";
package delf.protos;
message Box {
// Coordinates: [ymin, xmin, ymax, xmax] corresponds to
// [top, left, bottom, right].
optional float ymin = 1;
optional float xmin = 2;
optional float ymax = 3;
optional float xmax = 4;
// Detection score. Usually, the higher the more confident.
optional float score = 5;
// Indicates which class the box corresponds to.
optional int32 class_index = 6;
}
message Boxes {
repeated Box box = 1;
}
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Python interface for Boxes proto.
Support read and write of Boxes from/to numpy arrays and file.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from google3.third_party.tensorflow_models.delf.protos import box_pb2
def ArraysToBoxes(boxes, scores, class_indices):
"""Converts `boxes` to Boxes proto.
Args:
boxes: [N, 4] float array denoting bounding box coordinates, in format [top,
left, bottom, right].
scores: [N] float array with detection scores.
class_indices: [N] int array with class indices.
Returns:
boxes_proto: Boxes object.
"""
num_boxes = len(scores)
assert num_boxes == boxes.shape[0]
assert num_boxes == len(class_indices)
boxes_proto = box_pb2.Boxes()
for i in range(num_boxes):
boxes_proto.box.add(
ymin=boxes[i, 0],
xmin=boxes[i, 1],
ymax=boxes[i, 2],
xmax=boxes[i, 3],
score=scores[i],
class_index=class_indices[i])
return boxes_proto
def BoxesToArrays(boxes_proto):
"""Converts data saved in Boxes proto to numpy arrays.
If there are no boxes, the function returns three empty arrays.
Args:
boxes_proto: Boxes proto object.
Returns:
boxes: [N, 4] float array denoting bounding box coordinates, in format [top,
left, bottom, right].
scores: [N] float array with detection scores.
class_indices: [N] int array with class indices.
"""
num_boxes = len(boxes_proto.box)
if num_boxes == 0:
return np.array([]), np.array([]), np.array([])
boxes = np.zeros([num_boxes, 4])
scores = np.zeros([num_boxes])
class_indices = np.zeros([num_boxes])
for i in range(num_boxes):
box_proto = boxes_proto.box[i]
boxes[i] = [box_proto.ymin, box_proto.xmin, box_proto.ymax, box_proto.xmax]
scores[i] = box_proto.score
class_indices[i] = box_proto.class_index
return boxes, scores, class_indices
def SerializeToString(boxes, scores, class_indices):
"""Converts numpy arrays to serialized Boxes.
Args:
boxes: [N, 4] float array denoting bounding box coordinates, in format [top,
left, bottom, right].
scores: [N] float array with detection scores.
class_indices: [N] int array with class indices.
Returns:
Serialized Boxes string.
"""
boxes_proto = ArraysToBoxes(boxes, scores, class_indices)
return boxes_proto.SerializeToString()
def ParseFromString(string):
"""Converts serialized Boxes proto string to numpy arrays.
Args:
string: Serialized Boxes string.
Returns:
boxes: [N, 4] float array denoting bounding box coordinates, in format [top,
left, bottom, right].
scores: [N] float array with detection scores.
class_indices: [N] int array with class indices.
"""
boxes_proto = box_pb2.Boxes()
boxes_proto.ParseFromString(string)
return BoxesToArrays(boxes_proto)
def ReadFromFile(file_path):
"""Helper function to load data from a Boxes proto format in a file.
Args:
file_path: Path to file containing data.
Returns:
boxes: [N, 4] float array denoting bounding box coordinates, in format [top,
left, bottom, right].
scores: [N] float array with detection scores.
class_indices: [N] int array with class indices.
"""
with tf.gfile.GFile(file_path, 'rb') as f:
return ParseFromString(f.read())
def WriteToFile(file_path, boxes, scores, class_indices):
"""Helper function to write data to a file in Boxes proto format.
Args:
file_path: Path to file that will be written.
boxes: [N, 4] float array denoting bounding box coordinates, in format [top,
left, bottom, right].
scores: [N] float array with detection scores.
class_indices: [N] int array with class indices.
"""
serialized_data = SerializeToString(boxes, scores, class_indices)
with tf.gfile.GFile(file_path, 'w') as f:
f.write(serialized_data)
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for box_io, the python interface of Boxes proto."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import tensorflow as tf
from delf import box_io
class BoxesIoTest(tf.test.TestCase):
def _create_data(self):
"""Creates data to be used in tests.
Returns:
boxes: [N, 4] float array denoting bounding box coordinates, in format
[top,
left, bottom, right].
scores: [N] float array with detection scores.
class_indices: [N] int array with class indices.
"""
boxes = np.arange(24, dtype=np.float32).reshape(6, 4)
scores = np.arange(6, dtype=np.float32)
class_indices = np.arange(6, dtype=np.int32)
return boxes, scores, class_indices
def testConversionAndBack(self):
boxes, scores, class_indices = self._create_data()
serialized = box_io.SerializeToString(boxes, scores, class_indices)
parsed_data = box_io.ParseFromString(serialized)
self.assertAllEqual(boxes, parsed_data[0])
self.assertAllEqual(scores, parsed_data[1])
self.assertAllEqual(class_indices, parsed_data[2])
def testWriteAndReadToFile(self):
boxes, scores, class_indices = self._create_data()
tmpdir = tf.test.get_temp_dir()
filename = os.path.join(tmpdir, 'test.boxes')
box_io.WriteToFile(filename, boxes, scores, class_indices)
data_read = box_io.ReadFromFile(filename)
self.assertAllEqual(boxes, data_read[0])
self.assertAllEqual(scores, data_read[1])
self.assertAllEqual(class_indices, data_read[2])
def testWriteAndReadToFileEmptyFile(self):
tmpdir = tf.test.get_temp_dir()
filename = os.path.join(tmpdir, 'test.box')
box_io.WriteToFile(filename, np.array([]), np.array([]), np.array([]))
data_read = box_io.ReadFromFile(filename)
self.assertAllEqual(np.array([]), data_read[0])
self.assertAllEqual(np.array([]), data_read[1])
self.assertAllEqual(np.array([]), data_read[2])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Extracts bounding boxes from a list of images, saving them to files.
The images must be in JPG format. The program checks if boxes already
exist, and skips computation for those.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os
import sys
import time
import tensorflow as tf
from tensorflow.python.platform import app
from delf import box_io
cmd_args = None
# Extension of feature files.
_BOX_EXT = '.boxes'
# Pace to report extraction log.
_STATUS_CHECK_ITERATIONS = 100
def _ReadImageList(list_path):
"""Helper function to read image paths.
Args:
list_path: Path to list of images, one image path per line.
Returns:
image_paths: List of image paths.
"""
with tf.gfile.GFile(list_path, 'r') as f:
image_paths = f.readlines()
image_paths = [entry.rstrip() for entry in image_paths]
return image_paths
def _MakeDetector(sess, model_dir):
"""Creates a function to detect objects in an image.
Args:
sess: TensorFlow session to use.
model_dir: Directory where SavedModel is located.
Returns:
Function that receives an image and returns detection results.
"""
tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING],
model_dir)
input_images = sess.graph.get_tensor_by_name('input_images:0')
input_detection_thresh = sess.graph.get_tensor_by_name(
'input_detection_thresh:0')
boxes = sess.graph.get_tensor_by_name('detection_boxes:0')
scores = sess.graph.get_tensor_by_name('detection_scores:0')
class_indices = sess.graph.get_tensor_by_name('detection_classes:0')
def DetectorFn(images, threshold):
"""Receives an image and returns detected boxes.
Args:
images: Uint8 array with shape (batch, height, width 3) containing a batch
of RGB images.
threshold: Detector threshold (float).
Returns:
Tuple (boxes, scores, class_indices).
"""
return sess.run([boxes, scores, class_indices],
feed_dict={
input_images: images,
input_detection_thresh: threshold,
})
return DetectorFn
def main(argv):
if len(argv) > 1:
raise RuntimeError('Too many command-line arguments.')
tf.logging.set_verbosity(tf.logging.INFO)
# Read list of images.
tf.logging.info('Reading list of images...')
image_paths = _ReadImageList(cmd_args.list_images_path)
num_images = len(image_paths)
tf.logging.info('done! Found %d images', num_images)
# Create output directory if necessary.
if not os.path.exists(cmd_args.output_dir):
os.makedirs(cmd_args.output_dir)
# Tell TensorFlow that the model will be built into the default Graph.
with tf.Graph().as_default():
# Reading list of images.
filename_queue = tf.train.string_input_producer(image_paths, shuffle=False)
reader = tf.WholeFileReader()
_, value = reader.read(filename_queue)
image_tf = tf.image.decode_jpeg(value, channels=3)
image_tf = tf.expand_dims(image_tf, 0)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
detector_fn = _MakeDetector(sess, cmd_args.detector_path)
# Start input enqueue threads.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
start = time.clock()
for i, image_path in enumerate(image_paths):
# Write to log-info once in a while.
if i == 0:
tf.logging.info('Starting to detect objects in images...')
elif i % _STATUS_CHECK_ITERATIONS == 0:
elapsed = (time.clock() - start)
tf.logging.info(
'Processing image %d out of %d, last %d '
'images took %f seconds', i, num_images, _STATUS_CHECK_ITERATIONS,
elapsed)
start = time.clock()
# # Get next image.
im = sess.run(image_tf)
# If descriptor already exists, skip its computation.
base_boxes_filename, _ = os.path.splitext(os.path.basename(image_path))
out_boxes_filename = base_boxes_filename + _BOX_EXT
out_boxes_fullpath = os.path.join(cmd_args.output_dir,
out_boxes_filename)
if tf.gfile.Exists(out_boxes_fullpath):
tf.logging.info('Skipping %s', image_path)
continue
# Extract and save features.
(boxes_out, scores_out,
class_indices_out) = detector_fn(im, cmd_args.detector_thresh)
box_io.WriteToFile(out_boxes_fullpath, boxes_out[0], scores_out[0],
class_indices_out[0])
# Finalize enqueue threads.
coord.request_stop()
coord.join(threads)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.register('type', 'bool', lambda v: v.lower() == 'true')
parser.add_argument(
'--detector_path',
type=str,
default='/tmp/d2r_frcnn_20190411/',
help="""
Path to exported detector model.
""")
parser.add_argument(
'--detector_thresh',
type=float,
default=.0,
help="""
Detector threshold. Any box with confidence score lower than this is not
returned.
""")
parser.add_argument(
'--list_images_path',
type=str,
default='list_images.txt',
help="""
Path to list of images to undergo object detection.
""")
parser.add_argument(
'--output_dir',
type=str,
default='test_boxes',
help="""
Directory where bounding boxes will be written to. Each image's boxes
will be written to a file with same name, and extension replaced by
.boxes.
""")
cmd_args, unparsed = parser.parse_known_args()
app.run(main=main, argv=[sys.argv[0]] + unparsed)
......@@ -59,6 +59,54 @@ def _ReadImageList(list_path):
return image_paths
def MakeExtractor(sess, config):
"""Creates a function to extract features from an image.
Args:
sess: TensorFlow session to use.
config: DelfConfig proto containing the model configuration.
Returns:
Function that receives an image and returns features.
"""
tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING],
config.model_path)
input_image = sess.graph.get_tensor_by_name('input_image:0')
input_score_threshold = sess.graph.get_tensor_by_name('input_abs_thres:0')
input_image_scales = sess.graph.get_tensor_by_name('input_scales:0')
input_max_feature_num = sess.graph.get_tensor_by_name(
'input_max_feature_num:0')
boxes = sess.graph.get_tensor_by_name('boxes:0')
raw_descriptors = sess.graph.get_tensor_by_name('features:0')
feature_scales = sess.graph.get_tensor_by_name('scales:0')
attention_with_extra_dim = sess.graph.get_tensor_by_name('scores:0')
attention = tf.reshape(attention_with_extra_dim,
[tf.shape(attention_with_extra_dim)[0]])
locations, descriptors = feature_extractor.DelfFeaturePostProcessing(
boxes, raw_descriptors, config)
def ExtractorFn(image):
"""Receives an image and returns DELF features.
Args:
image: Uint8 array with shape (height, width 3) containing the RGB image.
Returns:
Tuple (locations, descriptors, feature_scales, attention)
"""
return sess.run(
[locations, descriptors, feature_scales, attention],
feed_dict={
input_image: image,
input_score_threshold: config.delf_local_config.score_threshold,
input_image_scales: list(config.image_scales),
input_max_feature_num: config.delf_local_config.max_feature_num
})
return ExtractorFn
def main(unused_argv):
tf.logging.set_verbosity(tf.logging.INFO)
......@@ -86,28 +134,10 @@ def main(unused_argv):
image_tf = tf.image.decode_jpeg(value, channels=3)
with tf.Session() as sess:
# Initialize variables.
init_op = tf.global_variables_initializer()
sess.run(init_op)
# Loading model that will be used.
tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING],
config.model_path)
graph = tf.get_default_graph()
input_image = graph.get_tensor_by_name('input_image:0')
input_score_threshold = graph.get_tensor_by_name('input_abs_thres:0')
input_image_scales = graph.get_tensor_by_name('input_scales:0')
input_max_feature_num = graph.get_tensor_by_name(
'input_max_feature_num:0')
boxes = graph.get_tensor_by_name('boxes:0')
raw_descriptors = graph.get_tensor_by_name('features:0')
feature_scales = graph.get_tensor_by_name('scales:0')
attention_with_extra_dim = graph.get_tensor_by_name('scores:0')
attention = tf.reshape(attention_with_extra_dim,
[tf.shape(attention_with_extra_dim)[0]])
locations, descriptors = feature_extractor.DelfFeaturePostProcessing(
boxes, raw_descriptors, config)
extractor_fn = MakeExtractor(sess, config)
# Start input enqueue threads.
coord = tf.train.Coordinator()
......@@ -119,9 +149,10 @@ def main(unused_argv):
tf.logging.info('Starting to extract DELF features from images...')
elif i % _STATUS_CHECK_ITERATIONS == 0:
elapsed = (time.clock() - start)
tf.logging.info('Processing image %d out of %d, last %d '
'images took %f seconds', i, num_images,
_STATUS_CHECK_ITERATIONS, elapsed)
tf.logging.info(
'Processing image %d out of %d, last %d '
'images took %f seconds', i, num_images, _STATUS_CHECK_ITERATIONS,
elapsed)
start = time.clock()
# # Get next image.
......@@ -137,18 +168,7 @@ def main(unused_argv):
# Extract and save features.
(locations_out, descriptors_out, feature_scales_out,
attention_out) = sess.run(
[locations, descriptors, feature_scales, attention],
feed_dict={
input_image:
im,
input_score_threshold:
config.delf_local_config.score_threshold,
input_image_scales:
list(config.image_scales),
input_max_feature_num:
config.delf_local_config.max_feature_num
})
attention_out) = extractor_fn(im)
feature_io.WriteToFile(out_desc_fullpath, locations_out,
feature_scales_out, descriptors_out,
......
......@@ -22,7 +22,6 @@ from __future__ import division
from __future__ import print_function
import numpy as np
from six.moves import xrange
import tensorflow as tf
from delf import feature_pb2
......@@ -37,8 +36,8 @@ def ArraysToDelfFeatures(locations,
"""Converts DELF features to DelfFeatures proto.
Args:
locations: [N, 2] float array which denotes the selected keypoint
locations. N is the number of features.
locations: [N, 2] float array which denotes the selected keypoint locations.
N is the number of features.
scales: [N] float array with feature scales.
descriptors: [N, depth] float array with DELF descriptors.
attention: [N] float array with attention scores.
......@@ -89,7 +88,7 @@ def DelfFeaturesToArrays(delf_features):
"""
num_features = len(delf_features.feature)
if num_features == 0:
return np.array([]), np.array([]), np.array([]), np.array([])
return np.array([]), np.array([]), np.array([]), np.array([]), np.array([])
# Figure out descriptor dimensionality by parsing first one.
descriptor_dim = len(
......@@ -120,8 +119,8 @@ def SerializeToString(locations,
"""Converts numpy arrays to serialized DelfFeatures.
Args:
locations: [N, 2] float array which denotes the selected keypoint
locations. N is the number of features.
locations: [N, 2] float array which denotes the selected keypoint locations.
N is the number of features.
scales: [N] float array with feature scales.
descriptors: [N, depth] float array with DELF descriptors.
attention: [N] float array with attention scores.
......@@ -183,8 +182,8 @@ def WriteToFile(file_path,
Args:
file_path: Path to file that will be written.
locations: [N, 2] float array which denotes the selected keypoint
locations. N is the number of features.
locations: [N, 2] float array which denotes the selected keypoint locations.
N is the number of features.
scales: [N] float array with feature scales.
descriptors: [N, depth] float array with DELF descriptors.
attention: [N] float array with attention scores.
......
......@@ -93,6 +93,19 @@ class DelfFeaturesIoTest(tf.test.TestCase):
self.assertAllEqual(attention, data_read[3])
self.assertAllEqual(orientations, data_read[4])
def testWriteAndReadToFileEmptyFile(self):
tmpdir = tf.test.get_temp_dir()
filename = os.path.join(tmpdir, 'test.delf')
feature_io.WriteToFile(filename, np.array([]), np.array([]), np.array([]),
np.array([]), np.array([]))
data_read = feature_io.ReadFromFile(filename)
self.assertAllEqual(np.array([]), data_read[0])
self.assertAllEqual(np.array([]), data_read[1])
self.assertAllEqual(np.array([]), data_read[2])
self.assertAllEqual(np.array([]), data_read[3])
self.assertAllEqual(np.array([]), data_read[4])
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment