Commit 47bc1813 authored by syiming's avatar syiming
Browse files

Merge remote-tracking branch 'upstream/master' into add_multilevel_crop_and_resize

parents d8611151 b035a227
...@@ -19,10 +19,6 @@ from __future__ import print_function ...@@ -19,10 +19,6 @@ from __future__ import print_function
import tensorflow as tf import tensorflow as tf
from delf import delf_v1
from object_detection.core import box_list
from object_detection.core import box_list_ops
def NormalizePixelValues(image, def NormalizePixelValues(image,
pixel_value_offset=128.0, pixel_value_offset=128.0,
...@@ -81,219 +77,6 @@ def CalculateKeypointCenters(boxes): ...@@ -81,219 +77,6 @@ def CalculateKeypointCenters(boxes):
2.0) 2.0)
def ExtractKeypointDescriptor(image, layer_name, image_scales, iou,
max_feature_num, abs_thres, model_fn):
"""Extract keypoint descriptor for input image.
Args:
image: A image tensor with shape [h, w, channels].
layer_name: The endpoint of feature extraction layer.
image_scales: A 1D float tensor which contains the scales.
iou: A float scalar denoting the IOU threshold for NMS.
max_feature_num: An int tensor denoting the maximum selected feature points.
abs_thres: A float tensor denoting the score threshold for feature
selection.
model_fn: Model function. Follows the signature:
* Args:
* `images`: Image tensor which is re-scaled.
* `normalized_image`: Whether or not the images are normalized.
* `reuse`: Whether or not the layer and its variables should be reused.
* Returns:
* `attention`: Attention score after the non-linearity.
* `feature_map`: Feature map obtained from the ResNet model.
Returns:
boxes: [N, 4] float tensor which denotes the selected receptive box. N is
the number of final feature points which pass through keypoint selection
and NMS steps.
feature_scales: [N] float tensor. It is the inverse of the input image
scales such that larger image scales correspond to larger image regions,
which is compatible with scale-space keypoint detection convention.
features: [N, depth] float tensor with feature descriptors.
scores: [N, 1] float tensor denoting the attention score.
Raises:
ValueError: If the layer_name is unsupported.
"""
original_image_shape_float = tf.gather(
tf.cast(tf.shape(image), dtype=tf.float32), [0, 1])
image_tensor = NormalizePixelValues(image)
image_tensor = tf.expand_dims(image_tensor, 0, name='image/expand_dims')
# Feature depth and receptive field parameters for each network version.
if layer_name == 'resnet_v1_50/block3':
feature_depth = 1024
rf, stride, padding = [291.0, 32.0, 145.0]
elif layer_name == 'resnet_v1_50/block4':
feature_depth = 2048
rf, stride, padding = [483.0, 32.0, 241.0]
else:
raise ValueError('Unsupported layer_name.')
def _ProcessSingleScale(scale_index,
boxes,
features,
scales,
scores,
reuse=True):
"""Resize the image and run feature extraction and keypoint selection.
This function will be passed into tf.while_loop() and be called
repeatedly. The input boxes are collected from the previous iteration
[0: scale_index -1]. We get the current scale by
image_scales[scale_index], and run image resizing, feature extraction and
keypoint selection. Then we will get a new set of selected_boxes for
current scale. In the end, we concat the previous boxes with current
selected_boxes as the output.
Args:
scale_index: A valid index in the image_scales.
boxes: Box tensor with the shape of [N, 4].
features: Feature tensor with the shape of [N, depth].
scales: Scale tensor with the shape of [N].
scores: Attention score tensor with the shape of [N].
reuse: Whether or not the layer and its variables should be reused.
Returns:
scale_index: The next scale index for processing.
boxes: Concatenated box tensor with the shape of [K, 4]. K >= N.
features: Concatenated feature tensor with the shape of [K, depth].
scales: Concatenated scale tensor with the shape of [K].
scores: Concatenated attention score tensor with the shape of [K].
"""
scale = tf.gather(image_scales, scale_index)
new_image_size = tf.cast(
tf.round(original_image_shape_float * scale), dtype=tf.int32)
resized_image = tf.compat.v1.image.resize_bilinear(image_tensor,
new_image_size)
attention, feature_map = model_fn(
resized_image, normalized_image=True, reuse=reuse)
rf_boxes = CalculateReceptiveBoxes(
tf.shape(feature_map)[1],
tf.shape(feature_map)[2], rf, stride, padding)
# Re-project back to the original image space.
rf_boxes = tf.divide(rf_boxes, scale)
attention = tf.reshape(attention, [-1])
feature_map = tf.reshape(feature_map, [-1, feature_depth])
# Use attention score to select feature vectors.
indices = tf.reshape(tf.where(attention >= abs_thres), [-1])
selected_boxes = tf.gather(rf_boxes, indices)
selected_features = tf.gather(feature_map, indices)
selected_scores = tf.gather(attention, indices)
selected_scales = tf.ones_like(selected_scores, tf.float32) / scale
# Concat with the previous result from different scales.
boxes = tf.concat([boxes, selected_boxes], 0)
features = tf.concat([features, selected_features], 0)
scales = tf.concat([scales, selected_scales], 0)
scores = tf.concat([scores, selected_scores], 0)
return scale_index + 1, boxes, features, scales, scores
output_boxes = tf.zeros([0, 4], dtype=tf.float32)
output_features = tf.zeros([0, feature_depth], dtype=tf.float32)
output_scales = tf.zeros([0], dtype=tf.float32)
output_scores = tf.zeros([0], dtype=tf.float32)
# Process the first scale separately, the following scales will reuse the
# graph variables.
(_, output_boxes, output_features, output_scales,
output_scores) = _ProcessSingleScale(
0,
output_boxes,
output_features,
output_scales,
output_scores,
reuse=False)
i = tf.constant(1, dtype=tf.int32)
num_scales = tf.shape(image_scales)[0]
keep_going = lambda j, boxes, features, scales, scores: tf.less(j, num_scales)
(_, output_boxes, output_features, output_scales,
output_scores) = tf.while_loop(
cond=keep_going,
body=_ProcessSingleScale,
loop_vars=[
i, output_boxes, output_features, output_scales, output_scores
],
shape_invariants=[
i.get_shape(),
tf.TensorShape([None, 4]),
tf.TensorShape([None, feature_depth]),
tf.TensorShape([None]),
tf.TensorShape([None])
],
back_prop=False)
feature_boxes = box_list.BoxList(output_boxes)
feature_boxes.add_field('features', output_features)
feature_boxes.add_field('scales', output_scales)
feature_boxes.add_field('scores', output_scores)
nms_max_boxes = tf.minimum(max_feature_num, feature_boxes.num_boxes())
final_boxes = box_list_ops.non_max_suppression(feature_boxes, iou,
nms_max_boxes)
return (final_boxes.get(), final_boxes.get_field('scales'),
final_boxes.get_field('features'),
tf.expand_dims(final_boxes.get_field('scores'), 1))
def BuildModel(layer_name, attention_nonlinear, attention_type,
attention_kernel_size):
"""Build the DELF model.
This function is helpful for constructing the model function which will be fed
to ExtractKeypointDescriptor().
Args:
layer_name: the endpoint of feature extraction layer.
attention_nonlinear: Type of the non-linearity for the attention function.
Currently, only 'softplus' is supported.
attention_type: Type of the attention used. Options are:
'use_l2_normalized_feature' and 'use_default_input_feature'. Note that
this is irrelevant during inference time.
attention_kernel_size: Size of attention kernel (kernel is square).
Returns:
Attention model function.
"""
def _ModelFn(images, normalized_image, reuse):
"""Attention model to get feature map and attention score map.
Args:
images: Image tensor.
normalized_image: Whether or not the images are normalized.
reuse: Whether or not the layer and its variables should be reused.
Returns:
attention: Attention score after the non-linearity.
feature_map: Feature map after ResNet convolution.
"""
if normalized_image:
image_tensor = images
else:
image_tensor = NormalizePixelValues(images)
# Extract features and attention scores.
model = delf_v1.DelfV1(layer_name)
_, attention, _, feature_map, _ = model.GetAttentionPrelogit(
image_tensor,
attention_nonlinear=attention_nonlinear,
attention_type=attention_type,
kernel=[attention_kernel_size, attention_kernel_size],
training_resnet=False,
training_attention=False,
reuse=reuse)
return attention, feature_map
return _ModelFn
def ApplyPcaAndWhitening(data, def ApplyPcaAndWhitening(data,
pca_matrix, pca_matrix,
pca_mean, pca_mean,
...@@ -345,22 +128,21 @@ def PostProcessDescriptors(descriptors, use_pca, pca_parameters=None): ...@@ -345,22 +128,21 @@ def PostProcessDescriptors(descriptors, use_pca, pca_parameters=None):
normalization and (possibly) PCA/whitening. normalization and (possibly) PCA/whitening.
""" """
# L2-normalize, and if desired apply PCA (followed by L2-normalization). # L2-normalize, and if desired apply PCA (followed by L2-normalization).
with tf.compat.v1.variable_scope('postprocess'): final_descriptors = tf.nn.l2_normalize(
descriptors, axis=1, name='l2_normalization')
if use_pca:
# Apply PCA, and whitening if desired.
final_descriptors = ApplyPcaAndWhitening(final_descriptors,
pca_parameters['matrix'],
pca_parameters['mean'],
pca_parameters['dim'],
pca_parameters['use_whitening'],
pca_parameters['variances'])
# Re-normalize.
final_descriptors = tf.nn.l2_normalize( final_descriptors = tf.nn.l2_normalize(
descriptors, axis=1, name='l2_normalization') final_descriptors, axis=1, name='pca_l2_normalization')
if use_pca:
# Apply PCA, and whitening if desired.
final_descriptors = ApplyPcaAndWhitening(final_descriptors,
pca_parameters['matrix'],
pca_parameters['mean'],
pca_parameters['dim'],
pca_parameters['use_whitening'],
pca_parameters['variances'])
# Re-normalize.
final_descriptors = tf.nn.l2_normalize(
final_descriptors, axis=1, name='pca_l2_normalization')
return final_descriptors return final_descriptors
......
...@@ -18,7 +18,6 @@ from __future__ import absolute_import ...@@ -18,7 +18,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np
import tensorflow as tf import tensorflow as tf
from delf import feature_extractor from delf import feature_extractor
...@@ -34,78 +33,24 @@ class FeatureExtractorTest(tf.test.TestCase): ...@@ -34,78 +33,24 @@ class FeatureExtractorTest(tf.test.TestCase):
image, pixel_value_offset=5.0, pixel_value_scale=2.0) image, pixel_value_offset=5.0, pixel_value_scale=2.0)
exp_normalized_image = [[[-1.0, 125.0, -2.5], [14.5, 3.5, 0.0]], exp_normalized_image = [[[-1.0, 125.0, -2.5], [14.5, 3.5, 0.0]],
[[20.0, 0.0, 30.0], [25.5, 36.0, 42.0]]] [[20.0, 0.0, 30.0], [25.5, 36.0, 42.0]]]
with self.session() as sess:
normalized_image_out = sess.run(normalized_image)
self.assertAllEqual(normalized_image_out, exp_normalized_image) self.assertAllEqual(normalized_image, exp_normalized_image)
def testCalculateReceptiveBoxes(self): def testCalculateReceptiveBoxes(self):
boxes = feature_extractor.CalculateReceptiveBoxes( boxes = feature_extractor.CalculateReceptiveBoxes(
height=1, width=2, rf=291, stride=32, padding=145) height=1, width=2, rf=291, stride=32, padding=145)
exp_boxes = [[-145., -145., 145., 145.], [-145., -113., 145., 177.]] exp_boxes = [[-145., -145., 145., 145.], [-145., -113., 145., 177.]]
with self.session() as sess:
boxes_out = sess.run(boxes)
self.assertAllEqual(exp_boxes, boxes_out) self.assertAllEqual(exp_boxes, boxes)
def testCalculateKeypointCenters(self): def testCalculateKeypointCenters(self):
boxes = [[-10.0, 0.0, 11.0, 21.0], [-2.5, 5.0, 18.5, 26.0], boxes = [[-10.0, 0.0, 11.0, 21.0], [-2.5, 5.0, 18.5, 26.0],
[45.0, -2.5, 66.0, 18.5]] [45.0, -2.5, 66.0, 18.5]]
centers = feature_extractor.CalculateKeypointCenters(boxes) centers = feature_extractor.CalculateKeypointCenters(boxes)
with self.session() as sess:
centers_out = sess.run(centers)
exp_centers = [[0.5, 10.5], [8.0, 15.5], [55.5, 8.0]] exp_centers = [[0.5, 10.5], [8.0, 15.5], [55.5, 8.0]]
self.assertAllEqual(exp_centers, centers_out) self.assertAllEqual(exp_centers, centers)
def testExtractKeypointDescriptor(self):
image = tf.constant(
[[[0, 255, 255], [128, 64, 196]], [[0, 0, 32], [32, 128, 16]]],
dtype=tf.uint8)
# Arbitrary model function used to test ExtractKeypointDescriptor. The
# generated feature_map is a replicated version of the image, concatenated
# with zeros to achieve the required dimensionality. The attention is simply
# the norm of the input image pixels.
def _test_model_fn(image, normalized_image, reuse):
del normalized_image, reuse # Unused variables in the test.
image_shape = tf.shape(image)
attention = tf.squeeze(tf.norm(image, axis=3))
feature_map = tf.concat([
tf.tile(image, [1, 1, 1, 341]),
tf.zeros([1, image_shape[1], image_shape[2], 1])
],
axis=3)
return attention, feature_map
boxes, feature_scales, features, scores = (
feature_extractor.ExtractKeypointDescriptor(
image,
layer_name='resnet_v1_50/block3',
image_scales=tf.constant([1.0]),
iou=1.0,
max_feature_num=10,
abs_thres=1.5,
model_fn=_test_model_fn))
exp_boxes = [[-145.0, -145.0, 145.0, 145.0], [-113.0, -145.0, 177.0, 145.0]]
exp_feature_scales = [1.0, 1.0]
exp_features = np.array(
np.concatenate(
(np.tile([[-1.0, 127.0 / 128.0, 127.0 / 128.0], [-1.0, -1.0, -0.75]
], [1, 341]), np.zeros([2, 1])),
axis=1))
exp_scores = [[1.723042], [1.600781]]
with self.session() as sess:
boxes_out, feature_scales_out, features_out, scores_out = sess.run(
[boxes, feature_scales, features, scores])
self.assertAllEqual(exp_boxes, boxes_out)
self.assertAllEqual(exp_feature_scales, feature_scales_out)
self.assertAllClose(exp_features, features_out)
self.assertAllClose(exp_scores, scores_out)
def testPcaWhitening(self): def testPcaWhitening(self):
data = tf.constant([[1.0, 2.0, -2.0], [-5.0, 0.0, 3.0], [-1.0, 2.0, 0.0], data = tf.constant([[1.0, 2.0, -2.0], [-5.0, 0.0, 3.0], [-1.0, 2.0, 0.0],
...@@ -123,12 +68,8 @@ class FeatureExtractorTest(tf.test.TestCase): ...@@ -123,12 +68,8 @@ class FeatureExtractorTest(tf.test.TestCase):
exp_output = [[2.5, -5.0], [-6.0, -2.0], [-0.5, -3.0], [1.0, -2.0]] exp_output = [[2.5, -5.0], [-6.0, -2.0], [-0.5, -3.0], [1.0, -2.0]]
with self.session() as sess: self.assertAllEqual(exp_output, output)
output_out = sess.run(output)
self.assertAllEqual(exp_output, output_out)
if __name__ == '__main__': if __name__ == '__main__':
tf.compat.v1.disable_eager_execution()
tf.test.main() tf.test.main()
...@@ -20,11 +20,14 @@ from __future__ import print_function ...@@ -20,11 +20,14 @@ from __future__ import print_function
import os import os
from absl import flags
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from delf import feature_io from delf import feature_io
FLAGS = flags.FLAGS
def create_data(): def create_data():
"""Creates data to be used in tests. """Creates data to be used in tests.
...@@ -81,8 +84,7 @@ class DelfFeaturesIoTest(tf.test.TestCase): ...@@ -81,8 +84,7 @@ class DelfFeaturesIoTest(tf.test.TestCase):
def testWriteAndReadToFile(self): def testWriteAndReadToFile(self):
locations, scales, descriptors, attention, orientations = create_data() locations, scales, descriptors, attention, orientations = create_data()
tmpdir = tf.compat.v1.test.get_temp_dir() filename = os.path.join(FLAGS.test_tmpdir, 'test.delf')
filename = os.path.join(tmpdir, 'test.delf')
feature_io.WriteToFile(filename, locations, scales, descriptors, attention, feature_io.WriteToFile(filename, locations, scales, descriptors, attention,
orientations) orientations)
data_read = feature_io.ReadFromFile(filename) data_read = feature_io.ReadFromFile(filename)
...@@ -94,8 +96,7 @@ class DelfFeaturesIoTest(tf.test.TestCase): ...@@ -94,8 +96,7 @@ class DelfFeaturesIoTest(tf.test.TestCase):
self.assertAllEqual(orientations, data_read[4]) self.assertAllEqual(orientations, data_read[4])
def testWriteAndReadToFileEmptyFile(self): def testWriteAndReadToFileEmptyFile(self):
tmpdir = tf.compat.v1.test.get_temp_dir() filename = os.path.join(FLAGS.test_tmpdir, 'test.delf')
filename = os.path.join(tmpdir, 'test.delf')
feature_io.WriteToFile(filename, np.array([]), np.array([]), np.array([]), feature_io.WriteToFile(filename, np.array([]), np.array([]), np.array([]),
np.array([]), np.array([])) np.array([]), np.array([]))
data_read = feature_io.ReadFromFile(filename) data_read = feature_io.ReadFromFile(filename)
......
...@@ -302,6 +302,21 @@ def _write_relabeling_rules(relabeling_rules): ...@@ -302,6 +302,21 @@ def _write_relabeling_rules(relabeling_rules):
csv_writer.writerow([new_label, old_label]) csv_writer.writerow([new_label, old_label])
def _shuffle_by_columns(np_array, random_state):
"""Shuffle the columns of a 2D numpy array.
Args:
np_array: array to shuffle.
random_state: numpy RandomState to be used for shuffling.
Returns:
The shuffled array.
"""
columns = np_array.shape[1]
columns_indices = np.arange(columns)
random_state.shuffle(columns_indices)
return np_array[:, columns_indices]
def _build_train_and_validation_splits(image_paths, file_ids, labels, def _build_train_and_validation_splits(image_paths, file_ids, labels,
validation_split_size, seed): validation_split_size, seed):
"""Create TRAIN and VALIDATION splits containg all labels in equal proportion. """Create TRAIN and VALIDATION splits containg all labels in equal proportion.
...@@ -353,19 +368,21 @@ def _build_train_and_validation_splits(image_paths, file_ids, labels, ...@@ -353,19 +368,21 @@ def _build_train_and_validation_splits(image_paths, file_ids, labels,
for label, indexes in image_attrs_idx_by_label.items(): for label, indexes in image_attrs_idx_by_label.items():
# Create the subset for the current label. # Create the subset for the current label.
image_attrs_label = image_attrs[:, indexes] image_attrs_label = image_attrs[:, indexes]
images_per_label = image_attrs_label.shape[1]
# Shuffle the current label subset. # Shuffle the current label subset.
columns_indices = np.arange(images_per_label) image_attrs_label = _shuffle_by_columns(image_attrs_label, rs)
rs.shuffle(columns_indices)
image_attrs_label = image_attrs_label[:, columns_indices]
# Split the current label subset into TRAIN and VALIDATION splits and add # Split the current label subset into TRAIN and VALIDATION splits and add
# each split to the list of all splits. # each split to the list of all splits.
images_per_label = image_attrs_label.shape[1]
cutoff_idx = max(1, int(validation_split_size * images_per_label)) cutoff_idx = max(1, int(validation_split_size * images_per_label))
splits[_VALIDATION_SPLIT].append(image_attrs_label[:, 0 : cutoff_idx]) splits[_VALIDATION_SPLIT].append(image_attrs_label[:, 0 : cutoff_idx])
splits[_TRAIN_SPLIT].append(image_attrs_label[:, cutoff_idx : ]) splits[_TRAIN_SPLIT].append(image_attrs_label[:, cutoff_idx : ])
validation_split = np.concatenate(splits[_VALIDATION_SPLIT], axis=1) # Concatenate all subsets of image attributes into TRAIN and VALIDATION splits
train_split = np.concatenate(splits[_TRAIN_SPLIT], axis=1) # and reshuffle them again to ensure variance of labels across batches.
validation_split = _shuffle_by_columns(
np.concatenate(splits[_VALIDATION_SPLIT], axis=1), rs)
train_split = _shuffle_by_columns(
np.concatenate(splits[_TRAIN_SPLIT], axis=1), rs)
# Unstack the image attribute arrays in the TRAIN and VALIDATION splits and # Unstack the image attribute arrays in the TRAIN and VALIDATION splits and
# convert them back to lists. Convert labels back to 'int' from 'str' # convert them back to lists. Convert labels back to 'int' from 'str'
......
...@@ -27,6 +27,11 @@ import functools ...@@ -27,6 +27,11 @@ import functools
import tensorflow as tf import tensorflow as tf
class _GoogleLandmarksInfo(object):
"""Metadata about the Google Landmarks dataset."""
num_classes = {'gld_v1': 14951, 'gld_v2': 203094, 'gld_v2_clean': 81313}
class _DataAugmentationParams(object): class _DataAugmentationParams(object):
"""Default parameters for augmentation.""" """Default parameters for augmentation."""
# The following are used for training. # The following are used for training.
...@@ -114,6 +119,8 @@ def _ParseFunction(example, name_to_features, image_size, augmentation): ...@@ -114,6 +119,8 @@ def _ParseFunction(example, name_to_features, image_size, augmentation):
# Parse to get image. # Parse to get image.
image = parsed_example['image/encoded'] image = parsed_example['image/encoded']
image = tf.io.decode_jpeg(image) image = tf.io.decode_jpeg(image)
image = NormalizeImages(
image, pixel_value_scale=128.0, pixel_value_offset=128.0)
if augmentation: if augmentation:
image = _ImageNetCrop(image) image = _ImageNetCrop(image)
else: else:
...@@ -121,6 +128,7 @@ def _ParseFunction(example, name_to_features, image_size, augmentation): ...@@ -121,6 +128,7 @@ def _ParseFunction(example, name_to_features, image_size, augmentation):
image.set_shape([image_size, image_size, 3]) image.set_shape([image_size, image_size, 3])
# Parse to get label. # Parse to get label.
label = parsed_example['image/class/label'] label = parsed_example['image/class/label']
return image, label return image, label
...@@ -153,6 +161,7 @@ def CreateDataset(file_pattern, ...@@ -153,6 +161,7 @@ def CreateDataset(file_pattern,
'image/width': tf.io.FixedLenFeature([], tf.int64, default_value=0), 'image/width': tf.io.FixedLenFeature([], tf.int64, default_value=0),
'image/channels': tf.io.FixedLenFeature([], tf.int64, default_value=0), 'image/channels': tf.io.FixedLenFeature([], tf.int64, default_value=0),
'image/format': tf.io.FixedLenFeature([], tf.string, default_value=''), 'image/format': tf.io.FixedLenFeature([], tf.string, default_value=''),
'image/id': tf.io.FixedLenFeature([], tf.string, default_value=''),
'image/filename': tf.io.FixedLenFeature([], tf.string, default_value=''), 'image/filename': tf.io.FixedLenFeature([], tf.string, default_value=''),
'image/encoded': tf.io.FixedLenFeature([], tf.string, default_value=''), 'image/encoded': tf.io.FixedLenFeature([], tf.string, default_value=''),
'image/class/label': tf.io.FixedLenFeature([], tf.int64, default_value=0), 'image/class/label': tf.io.FixedLenFeature([], tf.int64, default_value=0),
...@@ -167,3 +176,12 @@ def CreateDataset(file_pattern, ...@@ -167,3 +176,12 @@ def CreateDataset(file_pattern,
dataset = dataset.batch(batch_size) dataset = dataset.batch(batch_size)
return dataset return dataset
def GoogleLandmarksInfo():
"""Returns metadata information on the Google Landmarks dataset.
Returns:
object _GoogleLandmarksInfo containing metadata about the GLD dataset.
"""
return _GoogleLandmarksInfo()
...@@ -132,10 +132,12 @@ class Delf(tf.keras.Model): ...@@ -132,10 +132,12 @@ class Delf(tf.keras.Model):
self.attn_classification.trainable_weights) self.attn_classification.trainable_weights)
def call(self, input_image, training=True): def call(self, input_image, training=True):
blocks = {'block3': None} blocks = {}
self.backbone(input_image, intermediates_dict=blocks, training=training)
features = blocks['block3'] self.backbone.build_call(
input_image, intermediates_dict=blocks, training=training)
features = blocks['block3'] # pytype: disable=key-error
_, probs, _ = self.attention(features, training=training) _, probs, _ = self.attention(features, training=training)
return probs, features return probs, features
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Export global feature tensorflow inference model.
This model includes image pyramids for multi-scale processing.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from absl import app
from absl import flags
import tensorflow as tf
from delf.python.training.model import delf_model
from delf.python.training.model import export_model_utils
FLAGS = flags.FLAGS
flags.DEFINE_string('ckpt_path', '/tmp/delf-logdir/delf-weights',
'Path to saved checkpoint.')
flags.DEFINE_string('export_path', None, 'Path where model will be exported.')
flags.DEFINE_list(
'input_scales_list', None,
'Optional input image scales to use. If None (default), an input end-point '
'"input_scales" is added for the exported model. If not None, the '
'specified list of floats will be hard-coded as the desired input scales.')
flags.DEFINE_enum(
'multi_scale_pool_type', 'None', ['None', 'average', 'sum'],
"If 'None' (default), the model is exported with an output end-point "
"'global_descriptors', where the global descriptor for each scale is "
"returned separately. If not 'None', the global descriptor of each scale is"
' pooled and a 1D global descriptor is returned, with output end-point '
"'global_descriptor'.")
flags.DEFINE_boolean('normalize_global_descriptor', False,
'If True, L2-normalizes global descriptor.')
def _build_tensor_info(tensor_dict):
"""Replace the dict's value by the tensor info.
Args:
tensor_dict: A dictionary contains <string, tensor>.
Returns:
dict: New dictionary contains <string, tensor_info>.
"""
return {
k: tf.compat.v1.saved_model.utils.build_tensor_info(t)
for k, t in tensor_dict.items()
}
def main(argv):
if len(argv) > 1:
raise app.UsageError('Too many command-line arguments.')
export_path = FLAGS.export_path
if os.path.exists(export_path):
raise ValueError('Export_path already exists.')
with tf.Graph().as_default() as g, tf.compat.v1.Session(graph=g) as sess:
# Setup the model for extraction.
model = delf_model.Delf(block3_strides=False, name='DELF')
# Initial forward pass to build model.
images = tf.zeros((1, 321, 321, 3), dtype=tf.float32)
model(images)
# Setup the multiscale extraction.
input_image = tf.compat.v1.placeholder(
tf.uint8, shape=(None, None, 3), name='input_image')
if FLAGS.input_scales_list is None:
input_scales = tf.compat.v1.placeholder(
tf.float32, shape=[None], name='input_scales')
else:
input_scales = tf.constant([float(s) for s in FLAGS.input_scales_list],
dtype=tf.float32,
shape=[len(FLAGS.input_scales_list)],
name='input_scales')
extracted_features = export_model_utils.ExtractGlobalFeatures(
input_image,
input_scales,
lambda x: model.backbone(x, training=False),
multi_scale_pool_type=FLAGS.multi_scale_pool_type,
normalize_global_descriptor=FLAGS.normalize_global_descriptor)
# Load the weights.
checkpoint_path = FLAGS.ckpt_path
model.load_weights(checkpoint_path)
print('Checkpoint loaded from ', checkpoint_path)
named_input_tensors = {'input_image': input_image}
if FLAGS.input_scales_list is None:
named_input_tensors['input_scales'] = input_scales
# Outputs to the exported model.
named_output_tensors = {}
if FLAGS.multi_scale_pool_type == 'None':
named_output_tensors['global_descriptors'] = tf.identity(
extracted_features, name='global_descriptors')
else:
named_output_tensors['global_descriptor'] = tf.identity(
extracted_features, name='global_descriptor')
# Export the model.
signature_def = (
tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
inputs=_build_tensor_info(named_input_tensors),
outputs=_build_tensor_info(named_output_tensors)))
print('Exporting trained model to:', export_path)
builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(export_path)
init_op = None
builder.add_meta_graph_and_variables(
sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
signature_def_map={
tf.compat.v1.saved_model.signature_constants
.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
signature_def
},
main_op=init_op)
builder.save()
if __name__ == '__main__':
app.run(main)
...@@ -42,67 +42,39 @@ flags.DEFINE_boolean('block3_strides', False, ...@@ -42,67 +42,39 @@ flags.DEFINE_boolean('block3_strides', False,
flags.DEFINE_float('iou', 1.0, 'IOU for non-max suppression.') flags.DEFINE_float('iou', 1.0, 'IOU for non-max suppression.')
def _build_tensor_info(tensor_dict): class _ExtractModule(tf.Module):
"""Replace the dict's value by the tensor info. """Helper module to build and save DELF model."""
Args: def __init__(self, block3_strides, iou):
tensor_dict: A dictionary contains <string, tensor>. """Initialization of DELF model.
Returns: Args:
dict: New dictionary contains <string, tensor_info>. block3_strides: bool, whether to add strides to the output of block3.
""" iou: IOU for non-max suppression.
return { """
k: tf.compat.v1.saved_model.utils.build_tensor_info(t) self._stride_factor = 2.0 if block3_strides else 1.0
for k, t in tensor_dict.items() self._iou = iou
}
def main(argv):
if len(argv) > 1:
raise app.UsageError('Too many command-line arguments.')
export_path = FLAGS.export_path
if os.path.exists(export_path):
raise ValueError('Export_path already exists.')
with tf.Graph().as_default() as g, tf.compat.v1.Session(graph=g) as sess:
# Setup the DELF model for extraction. # Setup the DELF model for extraction.
model = delf_model.Delf(block3_strides=FLAGS.block3_strides, name='DELF') self._model = delf_model.Delf(
block3_strides=block3_strides, name='DELF')
# Initial forward pass to build model.
images = tf.zeros((1, 321, 321, 3), dtype=tf.float32)
model(images)
stride_factor = 2.0 if FLAGS.block3_strides else 1.0 def LoadWeights(self, checkpoint_path):
self._model.load_weights(checkpoint_path)
# Setup the multiscale keypoint extraction. @tf.function(input_signature=[
input_image = tf.compat.v1.placeholder( tf.TensorSpec(shape=[None, None, 3], dtype=tf.uint8, name='input_image'),
tf.uint8, shape=(None, None, 3), name='input_image') tf.TensorSpec(shape=[None], dtype=tf.float32, name='input_scales'),
input_abs_thres = tf.compat.v1.placeholder( tf.TensorSpec(shape=(), dtype=tf.int32, name='input_max_feature_num'),
tf.float32, shape=(), name='input_abs_thres') tf.TensorSpec(shape=(), dtype=tf.float32, name='input_abs_thres')
input_scales = tf.compat.v1.placeholder( ])
tf.float32, shape=[None], name='input_scales') def ExtractFeatures(self, input_image, input_scales, input_max_feature_num,
input_max_feature_num = tf.compat.v1.placeholder( input_abs_thres):
tf.int32, shape=(), name='input_max_feature_num')
extracted_features = export_model_utils.ExtractLocalFeatures( extracted_features = export_model_utils.ExtractLocalFeatures(
input_image, input_scales, input_max_feature_num, input_abs_thres, input_image, input_scales, input_max_feature_num, input_abs_thres,
FLAGS.iou, lambda x: model(x, training=False), stride_factor) self._iou, lambda x: self._model(x, training=False),
self._stride_factor)
# Load the weights.
checkpoint_path = FLAGS.ckpt_path
model.load_weights(checkpoint_path)
print('Checkpoint loaded from ', checkpoint_path)
named_input_tensors = {
'input_image': input_image,
'input_scales': input_scales,
'input_abs_thres': input_abs_thres,
'input_max_feature_num': input_max_feature_num,
}
# Outputs to the exported model.
named_output_tensors = {} named_output_tensors = {}
named_output_tensors['boxes'] = tf.identity( named_output_tensors['boxes'] = tf.identity(
extracted_features[0], name='boxes') extracted_features[0], name='boxes')
...@@ -112,25 +84,27 @@ def main(argv): ...@@ -112,25 +84,27 @@ def main(argv):
extracted_features[2], name='scales') extracted_features[2], name='scales')
named_output_tensors['scores'] = tf.identity( named_output_tensors['scores'] = tf.identity(
extracted_features[3], name='scores') extracted_features[3], name='scores')
return named_output_tensors
def main(argv):
if len(argv) > 1:
raise app.UsageError('Too many command-line arguments.')
export_path = FLAGS.export_path
if os.path.exists(export_path):
raise ValueError(f'Export_path {export_path} already exists. Please '
'specify a different path or delete the existing one.')
module = _ExtractModule(FLAGS.block3_strides, FLAGS.iou)
# Load the weights.
checkpoint_path = FLAGS.ckpt_path
module.LoadWeights(checkpoint_path)
print('Checkpoint loaded from ', checkpoint_path)
# Export the model. # Save the module
signature_def = tf.compat.v1.saved_model.signature_def_utils.build_signature_def( tf.saved_model.save(module, export_path)
inputs=_build_tensor_info(named_input_tensors),
outputs=_build_tensor_info(named_output_tensors))
print('Exporting trained model to:', export_path)
builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(export_path)
init_op = None
builder.add_meta_graph_and_variables(
sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
signature_def_map={
tf.compat.v1.saved_model.signature_constants
.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
signature_def
},
main_op=init_op)
builder.save()
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -142,20 +142,21 @@ def ExtractLocalFeatures(image, image_scales, max_feature_num, abs_thres, iou, ...@@ -142,20 +142,21 @@ def ExtractLocalFeatures(image, image_scales, max_feature_num, abs_thres, iou,
keep_going = lambda j, b, f, scales, scores: tf.less(j, num_scales) keep_going = lambda j, b, f, scales, scores: tf.less(j, num_scales)
(_, output_boxes, output_features, output_scales, (_, output_boxes, output_features, output_scales,
output_scores) = tf.while_loop( output_scores) = tf.nest.map_structure(
cond=keep_going, tf.stop_gradient,
body=_ProcessSingleScale, tf.while_loop(
loop_vars=[ cond=keep_going,
i, output_boxes, output_features, output_scales, output_scores body=_ProcessSingleScale,
], loop_vars=[
shape_invariants=[ i, output_boxes, output_features, output_scales, output_scores
i.get_shape(), ],
tf.TensorShape([None, 4]), shape_invariants=[
tf.TensorShape([None, feature_depth]), i.get_shape(),
tf.TensorShape([None]), tf.TensorShape([None, 4]),
tf.TensorShape([None]) tf.TensorShape([None, feature_depth]),
], tf.TensorShape([None]),
back_prop=False) tf.TensorShape([None])
]))
feature_boxes = box_list.BoxList(output_boxes) feature_boxes = box_list.BoxList(output_boxes)
feature_boxes.add_field('features', output_features) feature_boxes.add_field('features', output_features)
...@@ -169,3 +170,109 @@ def ExtractLocalFeatures(image, image_scales, max_feature_num, abs_thres, iou, ...@@ -169,3 +170,109 @@ def ExtractLocalFeatures(image, image_scales, max_feature_num, abs_thres, iou,
return final_boxes.get(), final_boxes.get_field( return final_boxes.get(), final_boxes.get_field(
'features'), final_boxes.get_field('scales'), tf.expand_dims( 'features'), final_boxes.get_field('scales'), tf.expand_dims(
final_boxes.get_field('scores'), 1) final_boxes.get_field('scores'), 1)
def ExtractGlobalFeatures(image,
image_scales,
model_fn,
multi_scale_pool_type='None',
normalize_global_descriptor=False):
"""Extract global features for input image.
Args:
image: image tensor of type tf.uint8 with shape [h, w, channels].
image_scales: 1D float tensor which contains float scales used for image
pyramid construction.
model_fn: model function. Follows the signature:
* Args:
* `images`: Image tensor which is re-scaled.
* Returns:
* `global_descriptors`: Global descriptors for input images.
multi_scale_pool_type: If set, the global descriptor of each scale is pooled
and a 1D global descriptor is returned.
normalize_global_descriptor: If True, output global descriptors are
L2-normalized.
Returns:
global_descriptors: If `multi_scale_pool_type` is 'None', returns a [S, D]
float tensor. S is the number of scales, and D the global descriptor
dimensionality. Each D-dimensional entry is a global descriptor, which may
be L2-normalized depending on `normalize_global_descriptor`. If
`multi_scale_pool_type` is not 'None', returns a [D] float tensor with the
pooled global descriptor.
"""
original_image_shape_float = tf.gather(
tf.dtypes.cast(tf.shape(image), tf.float32), [0, 1])
image_tensor = gld.NormalizeImages(
image, pixel_value_offset=128.0, pixel_value_scale=128.0)
image_tensor = tf.expand_dims(image_tensor, 0, name='image/expand_dims')
def _ProcessSingleScale(scale_index, global_descriptors=None):
"""Resizes the image and runs feature extraction.
This function will be passed into tf.while_loop() and be called
repeatedly. We get the current scale by image_scales[scale_index], and
run image resizing / feature extraction. In the end, we concat the
previous global descriptors with current descriptor as the output.
Args:
scale_index: A valid index in image_scales.
global_descriptors: Global descriptor tensor with the shape of [S, D]. If
None, no previous global descriptors are used, and the output will be of
shape [1, D].
Returns:
scale_index: The next scale index for processing.
global_descriptors: A concatenated global descriptor tensor with the shape
of [S+1, D].
"""
scale = tf.gather(image_scales, scale_index)
new_image_size = tf.dtypes.cast(
tf.round(original_image_shape_float * scale), tf.int32)
resized_image = tf.image.resize(image_tensor, new_image_size)
global_descriptor = model_fn(resized_image)
if global_descriptors is None:
global_descriptors = global_descriptor
else:
global_descriptors = tf.concat([global_descriptors, global_descriptor], 0)
return scale_index + 1, global_descriptors
# Process the first scale separately, the following scales will reuse the
# graph variables.
(_, output_global) = _ProcessSingleScale(0)
i = tf.constant(1, dtype=tf.int32)
num_scales = tf.shape(image_scales)[0]
keep_going = lambda j, g: tf.less(j, num_scales)
(_, output_global) = tf.nest.map_structure(
tf.stop_gradient,
tf.while_loop(
cond=keep_going,
body=_ProcessSingleScale,
loop_vars=[i, output_global],
shape_invariants=[i.get_shape(),
tf.TensorShape([None, None])]))
normalization_axis = 1
if multi_scale_pool_type == 'average':
output_global = tf.reduce_mean(
output_global,
axis=0,
keepdims=False,
name='multi_scale_average_pooling')
normalization_axis = 0
elif multi_scale_pool_type == 'sum':
output_global = tf.reduce_sum(
output_global, axis=0, keepdims=False, name='multi_scale_sum_pooling')
normalization_axis = 0
if normalize_global_descriptor:
output_global = tf.nn.l2_normalize(
output_global, axis=normalization_axis, name='l2_normalization')
return output_global
...@@ -22,9 +22,14 @@ from __future__ import division ...@@ -22,9 +22,14 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import functools import functools
import os
import tempfile
from absl import logging
import h5py
import tensorflow as tf import tensorflow as tf
layers = tf.keras.layers layers = tf.keras.layers
...@@ -284,8 +289,8 @@ class ResNet50(tf.keras.Model): ...@@ -284,8 +289,8 @@ class ResNet50(tf.keras.Model):
else: else:
self.global_pooling = None self.global_pooling = None
def call(self, inputs, training=True, intermediates_dict=None): def build_call(self, inputs, training=True, intermediates_dict=None):
"""Call the ResNet50 model. """Building the ResNet50 model.
Args: Args:
inputs: Images to compute features for. inputs: Images to compute features for.
...@@ -356,3 +361,79 @@ class ResNet50(tf.keras.Model): ...@@ -356,3 +361,79 @@ class ResNet50(tf.keras.Model):
return self.global_pooling(x) return self.global_pooling(x)
else: else:
return x return x
def call(self, inputs, training=True, intermediates_dict=None):
"""Call the ResNet50 model.
Args:
inputs: Images to compute features for.
training: Whether model is in training phase.
intermediates_dict: `None` or dictionary. If not None, accumulate feature
maps from intermediate blocks into the dictionary. ""
Returns:
Tensor with featuremap.
"""
return self.build_call(inputs, training, intermediates_dict)
def restore_weights(self, filepath):
"""Load pretrained weights.
This function loads a .h5 file from the filepath with saved model weights
and assigns them to the model.
Args:
filepath: String, path to the .h5 file
Raises:
ValueError: if the file referenced by `filepath` does not exist.
"""
if not tf.io.gfile.exists(filepath):
raise ValueError('Unable to load weights from %s. You must provide a'
'valid file.' % (filepath))
# Create a local copy of the weights file for h5py to be able to read it.
local_filename = os.path.basename(filepath)
tmp_filename = os.path.join(tempfile.gettempdir(), local_filename)
tf.io.gfile.copy(filepath, tmp_filename, overwrite=True)
# Load the content of the weights file.
f = h5py.File(tmp_filename, mode='r')
saved_layer_names = [n.decode('utf8') for n in f.attrs['layer_names']]
try:
# Iterate through all the layers assuming the max `depth` is 2.
for layer in self.layers:
if hasattr(layer, 'layers'):
for inlayer in layer.layers:
# Make sure the weights are in the saved model, and that we are in
# the innermost layer.
if inlayer.name not in saved_layer_names:
raise ValueError('Layer %s absent from the pretrained weights.'
'Unable to load its weights.' % (inlayer.name))
if hasattr(inlayer, 'layers'):
raise ValueError('Layer %s is not a depth 2 layer. Unable to load'
'its weights.' % (inlayer.name))
# Assign the weights in the current layer.
g = f[inlayer.name]
weight_names = [n.decode('utf8') for n in g.attrs['weight_names']]
weight_values = [g[weight_name] for weight_name in weight_names]
print('Setting the weights for layer %s' % (inlayer.name))
inlayer.set_weights(weight_values)
finally:
# Clean up the temporary file.
tf.io.gfile.remove(tmp_filename)
def log_weights(self):
"""Log backbone weights."""
logging.info('Logging backbone weights')
logging.info('------------------------')
for layer in self.layers:
if hasattr(layer, 'layers'):
for inlayer in layer.layers:
logging.info('Weights for layer: %s, inlayer % s', layer.name,
inlayer.name)
weights = inlayer.get_weights()
logging.info(weights)
else:
logging.info('Layer %s does not have inner layers.',
layer.name)
...@@ -43,13 +43,20 @@ flags.DEFINE_string('train_file_pattern', '/tmp/data/train*', ...@@ -43,13 +43,20 @@ flags.DEFINE_string('train_file_pattern', '/tmp/data/train*',
'File pattern of training dataset files.') 'File pattern of training dataset files.')
flags.DEFINE_string('validation_file_pattern', '/tmp/data/validation*', flags.DEFINE_string('validation_file_pattern', '/tmp/data/validation*',
'File pattern of validation dataset files.') 'File pattern of validation dataset files.')
flags.DEFINE_enum(
'dataset_version', 'gld_v1', ['gld_v1', 'gld_v2', 'gld_v2_clean'],
'Google Landmarks dataset version, used to determine the'
'number of classes.')
flags.DEFINE_integer('seed', 0, 'Seed to training dataset.') flags.DEFINE_integer('seed', 0, 'Seed to training dataset.')
flags.DEFINE_float('initial_lr', 0.001, 'Initial learning rate.') flags.DEFINE_float('initial_lr', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('batch_size', 32, 'Global batch size.') flags.DEFINE_integer('batch_size', 32, 'Global batch size.')
flags.DEFINE_integer('max_iters', 500000, 'Maximum iterations.') flags.DEFINE_integer('max_iters', 500000, 'Maximum iterations.')
flags.DEFINE_boolean('block3_strides', False, 'Whether to use block3_strides.') flags.DEFINE_boolean('block3_strides', True, 'Whether to use block3_strides.')
flags.DEFINE_boolean('use_augmentation', True, flags.DEFINE_boolean('use_augmentation', True,
'Whether to use ImageNet style augmentation.') 'Whether to use ImageNet style augmentation.')
flags.DEFINE_string(
'imagenet_checkpoint', None,
'ImageNet checkpoint for ResNet backbone. If None, no checkpoint is used.')
def _record_accuracy(metric, logits, labels): def _record_accuracy(metric, logits, labels):
...@@ -60,6 +67,10 @@ def _record_accuracy(metric, logits, labels): ...@@ -60,6 +67,10 @@ def _record_accuracy(metric, logits, labels):
def _attention_summaries(scores, global_step): def _attention_summaries(scores, global_step):
"""Record statistics of the attention score.""" """Record statistics of the attention score."""
tf.summary.image(
'batch_attention',
scores / tf.reduce_max(scores + 1e-3),
step=global_step)
tf.summary.scalar('attention/max', tf.reduce_max(scores), step=global_step) tf.summary.scalar('attention/max', tf.reduce_max(scores), step=global_step)
tf.summary.scalar('attention/min', tf.reduce_min(scores), step=global_step) tf.summary.scalar('attention/min', tf.reduce_min(scores), step=global_step)
tf.summary.scalar('attention/mean', tf.reduce_mean(scores), step=global_step) tf.summary.scalar('attention/mean', tf.reduce_mean(scores), step=global_step)
...@@ -120,7 +131,7 @@ def main(argv): ...@@ -120,7 +131,7 @@ def main(argv):
max_iters = FLAGS.max_iters max_iters = FLAGS.max_iters
global_batch_size = FLAGS.batch_size global_batch_size = FLAGS.batch_size
image_size = 321 image_size = 321
num_eval = 1000 num_eval_batches = int(50000 / global_batch_size)
report_interval = 100 report_interval = 100
eval_interval = 1000 eval_interval = 1000
save_interval = 20000 save_interval = 20000
...@@ -130,15 +141,16 @@ def main(argv): ...@@ -130,15 +141,16 @@ def main(argv):
clip_val = tf.constant(10.0) clip_val = tf.constant(10.0)
if FLAGS.debug: if FLAGS.debug:
tf.config.run_functions_eagerly(True)
global_batch_size = 4 global_batch_size = 4
max_iters = 4 max_iters = 100
num_eval = 1 num_eval_batches = 1
save_interval = 1 save_interval = 1
report_interval = 1 report_interval = 1
# TODO(andrearaujo): Using placeholder, replace with actual value using # Determine the number of classes based on the version of the dataset.
# GoogleLandmarksInfo() from datasets/googlelandmarks.py. gld_info = gld.GoogleLandmarksInfo()
num_classes = 14951 num_classes = gld_info.num_classes[FLAGS.dataset_version]
# ------------------------------------------------------------ # ------------------------------------------------------------
# Create the distributed train/validation sets. # Create the distributed train/validation sets.
...@@ -155,11 +167,12 @@ def main(argv): ...@@ -155,11 +167,12 @@ def main(argv):
augmentation=False, augmentation=False,
seed=FLAGS.seed) seed=FLAGS.seed)
train_iterator = strategy.make_dataset_iterator(train_dataset) train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset)
validation_iterator = strategy.make_dataset_iterator(validation_dataset) validation_dist_dataset = strategy.experimental_distribute_dataset(
validation_dataset)
train_iterator.initialize() train_iter = iter(train_dist_dataset)
validation_iterator.initialize() validation_iter = iter(validation_dist_dataset)
# Create a checkpoint directory to store the checkpoints. # Create a checkpoint directory to store the checkpoints.
checkpoint_prefix = os.path.join(FLAGS.logdir, 'delf_tf2-ckpt') checkpoint_prefix = os.path.join(FLAGS.logdir, 'delf_tf2-ckpt')
...@@ -215,11 +228,14 @@ def main(argv): ...@@ -215,11 +228,14 @@ def main(argv):
labels = tf.clip_by_value(labels, 0, model.num_classes) labels = tf.clip_by_value(labels, 0, model.num_classes)
global_step = optimizer.iterations global_step = optimizer.iterations
tf.summary.image('batch_images', (images + 1.0) / 2.0, step=global_step)
tf.summary.scalar( tf.summary.scalar(
'image_range/max', tf.reduce_max(images), step=global_step) 'image_range/max', tf.reduce_max(images), step=global_step)
tf.summary.scalar( tf.summary.scalar(
'image_range/min', tf.reduce_min(images), step=global_step) 'image_range/min', tf.reduce_min(images), step=global_step)
# TODO(andrearaujo): we should try to unify the backprop into a single
# function, instead of applying once to descriptor then to attention.
def _backprop_loss(tape, loss, weights): def _backprop_loss(tape, loss, weights):
"""Backpropogate losses using clipped gradients. """Backpropogate losses using clipped gradients.
...@@ -340,12 +356,25 @@ def main(argv): ...@@ -340,12 +356,25 @@ def main(argv):
with tf.summary.record_if( with tf.summary.record_if(
tf.math.equal(0, optimizer.iterations % report_interval)): tf.math.equal(0, optimizer.iterations % report_interval)):
# TODO(dananghel): try to load pretrained weights at backbone creation.
# Load pretrained weights for ResNet50 trained on ImageNet.
if FLAGS.imagenet_checkpoint is not None:
logging.info('Attempting to load ImageNet pretrained weights.')
input_batch = next(train_iter)
_, _ = distributed_train_step(input_batch)
model.backbone.restore_weights(FLAGS.imagenet_checkpoint)
logging.info('Done.')
else:
logging.info('Skip loading ImageNet pretrained weights.')
if FLAGS.debug:
model.backbone.log_weights()
global_step_value = optimizer.iterations.numpy() global_step_value = optimizer.iterations.numpy()
while global_step_value < max_iters: while global_step_value < max_iters:
# input_batch : images(b, h, w, c), labels(b,). # input_batch : images(b, h, w, c), labels(b,).
try: try:
input_batch = train_iterator.get_next() input_batch = next(train_iter)
except tf.errors.OutOfRangeError: except tf.errors.OutOfRangeError:
# Break if we run out of data in the dataset. # Break if we run out of data in the dataset.
logging.info('Stopping training at global step %d, no more data', logging.info('Stopping training at global step %d, no more data',
...@@ -388,9 +417,9 @@ def main(argv): ...@@ -388,9 +417,9 @@ def main(argv):
# Validate once in {eval_interval*n, n \in N} steps. # Validate once in {eval_interval*n, n \in N} steps.
if global_step_value % eval_interval == 0: if global_step_value % eval_interval == 0:
for i in range(num_eval): for i in range(num_eval_batches):
try: try:
validation_batch = validation_iterator.get_next() validation_batch = next(validation_iter)
desc_validation_result, attn_validation_result = ( desc_validation_result, attn_validation_result = (
distributed_validation_step(validation_batch)) distributed_validation_step(validation_batch))
except tf.errors.OutOfRangeError: except tf.errors.OutOfRangeError:
...@@ -412,13 +441,17 @@ def main(argv): ...@@ -412,13 +441,17 @@ def main(argv):
print(' : attn:', attn_validation_result.numpy()) print(' : attn:', attn_validation_result.numpy())
# Save checkpoint once (each save_interval*n, n \in N) steps. # Save checkpoint once (each save_interval*n, n \in N) steps.
# TODO(andrearaujo): save only in one of the two ways. They are
# identical, the only difference is that the manager adds some extra
# prefixes and variables (eg, optimizer variables).
if global_step_value % save_interval == 0: if global_step_value % save_interval == 0:
save_path = manager.save() save_path = manager.save()
logging.info('Saved({global_step_value}) at %s', save_path) logging.info('Saved (%d) at %s', global_step_value, save_path)
file_path = '%s/delf_weights' % FLAGS.logdir file_path = '%s/delf_weights' % FLAGS.logdir
model.save_weights(file_path, save_format='tf') model.save_weights(file_path, save_format='tf')
logging.info('Saved weights({global_step_value}) at %s', file_path) logging.info('Saved weights (%d) at %s', global_step_value,
file_path)
# Reset metrics for next step. # Reset metrics for next step.
desc_train_accuracy.reset_states() desc_train_accuracy.reset_states()
......
...@@ -149,7 +149,7 @@ class UvfAgentCore(object): ...@@ -149,7 +149,7 @@ class UvfAgentCore(object):
error = tf.square(actions - pred_actions) error = tf.square(actions - pred_actions)
spec_range = (self._action_spec.maximum - self._action_spec.minimum) / 2 spec_range = (self._action_spec.maximum - self._action_spec.minimum) / 2
normalized_error = error / tf.constant(spec_range) ** 2 normalized_error = tf.cast(error, tf.float64) / tf.constant(spec_range) ** 2
return -normalized_error return -normalized_error
@gin.configurable('uvf_add_noise_fn') @gin.configurable('uvf_add_noise_fn')
......
...@@ -4,20 +4,23 @@ ...@@ -4,20 +4,23 @@
# Neural Programmer # Neural Programmer
Implementation of the Neural Programmer model described in [paper](https://openreview.net/pdf?id=ry2YOrcge) Implementation of the Neural Programmer model as described in this [paper](https://openreview.net/pdf?id=ry2YOrcge).
Download and extract the data from [dropbox](https://www.dropbox.com/s/9tvtcv6lmy51zfw/data.zip?dl=0). Change the ``data_dir FLAG`` to the location of the data. Download and extract the data from the [WikiTableQuestions](https://ppasupat.github.io/WikiTableQuestions/) site. The dataset contains
11321, 2831, and 4344 examples for training, development, and testing respectively. We use their tokenization, number and date pre-processing. Please note that the above paper used the [initial release](https://github.com/ppasupat/WikiTableQuestions/releases/tag/v0.2) for training, development and testing.
Change the `data_dir FLAG` to the location of the data.
### Training ### Training
``python neural_programmer.py`` Run `python neural_programmer.py`
The models are written to FLAGS.output_dir The models are written to `FLAGS.output_dir`.
### Testing ### Testing
``python neural_programmer.py --evaluator_job=True`` Run `python neural_programmer.py --evaluator_job=True`
The models are loaded from ``FLAGS.output_dir``. The evaluation is done on development data. The models are loaded from `FLAGS.output_dir`. The evaluation is done on development data.
In case of errors because of encoding, add ``"# -*- coding: utf-8 -*-"`` as the first line in ``wiki_data.py`` In case of errors because of encoding, add `"# -*- coding: utf-8 -*-"` as the first line in `wiki_data.py`
Maintained by Arvind Neelakantan (arvind2505) Maintained by Arvind Neelakantan (arvind2505)
...@@ -2,17 +2,16 @@ ...@@ -2,17 +2,16 @@
![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) ![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
# Tensorflow Object Detection API # Tensorflow Object Detection API
Creating accurate machine learning models capable of localizing and identifying Creating accurate machine learning models capable of localizing and identifying
multiple objects in a single image remains a core challenge in computer vision. multiple objects in a single image remains a core challenge in computer vision.
The TensorFlow Object Detection API is an open source framework built on top of The TensorFlow Object Detection API is an open source framework built on top of
TensorFlow that makes it easy to construct, train and deploy object detection TensorFlow that makes it easy to construct, train and deploy object detection
models. At Google we’ve certainly found this codebase to be useful for our models. At Google we’ve certainly found this codebase to be useful for our
computer vision needs, and we hope that you will as well. computer vision needs, and we hope that you will as well. <p align="center">
<p align="center"> <img src="g3doc/img/kites_detections_output.jpg" width=676 height=450> </p>
<img src="g3doc/img/kites_detections_output.jpg" width=676 height=450>
</p>
Contributions to the codebase are welcome and we would love to hear back from Contributions to the codebase are welcome and we would love to hear back from
you if you find this API useful. Finally if you use the Tensorflow Object you if you find this API useful. Finally if you use the Tensorflow Object
Detection API for a research publication, please consider citing: Detection API for a research publication, please consider citing:
``` ```
...@@ -20,8 +19,8 @@ Detection API for a research publication, please consider citing: ...@@ -20,8 +19,8 @@ Detection API for a research publication, please consider citing:
Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z, Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z,
Song Y, Guadarrama S, Murphy K, CVPR 2017 Song Y, Guadarrama S, Murphy K, CVPR 2017
``` ```
\[[link](https://arxiv.org/abs/1611.10012)\]\[[bibtex](
https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAWUIIlnPZ_L9jxvPwcC49kDlELtaeIyU-&scisf=4&ct=citation&cd=-1&hl=en&scfhb=1)\] \[[link](https://arxiv.org/abs/1611.10012)\]\[[bibtex](https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAWUIIlnPZ_L9jxvPwcC49kDlELtaeIyU-&scisf=4&ct=citation&cd=-1&hl=en&scfhb=1)\]
<p align="center"> <p align="center">
<img src="g3doc/img/tf-od-api-logo.png" width=140 height=195> <img src="g3doc/img/tf-od-api-logo.png" width=140 height=195>
...@@ -29,63 +28,65 @@ https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.go ...@@ -29,63 +28,65 @@ https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.go
## Maintainers ## Maintainers
| Name | GitHub | Name | GitHub
| --- | --- | -------------- | ---------------------------------------------
| Jonathan Huang | [jch1](https://github.com/jch1) | Jonathan Huang | [jch1](https://github.com/jch1)
| Vivek Rathod | [tombstone](https://github.com/tombstone) | Vivek Rathod | [tombstone](https://github.com/tombstone)
| Ronny Votel | [ronnyvotel](https://github.com/ronnyvotel) | Ronny Votel | [ronnyvotel](https://github.com/ronnyvotel)
| Derek Chow | [derekjchow](https://github.com/derekjchow) | Derek Chow | [derekjchow](https://github.com/derekjchow)
| Chen Sun | [jesu9](https://github.com/jesu9) | Chen Sun | [jesu9](https://github.com/jesu9)
| Menglong Zhu | [dreamdragon](https://github.com/dreamdragon) | Menglong Zhu | [dreamdragon](https://github.com/dreamdragon)
| Alireza Fathi | [afathi3](https://github.com/afathi3) | Alireza Fathi | [afathi3](https://github.com/afathi3)
| Zhichao Lu | [pkulzc](https://github.com/pkulzc) | Zhichao Lu | [pkulzc](https://github.com/pkulzc)
## Table of contents ## Table of contents
Setup: Setup:
* <a href='g3doc/installation.md'>Installation</a><br> * <a href='g3doc/installation.md'>Installation</a><br>
Quick Start: Quick Start:
* <a href='object_detection_tutorial.ipynb'> * <a href='object_detection_tutorial.ipynb'>
Quick Start: Jupyter notebook for off-the-shelf inference</a><br> Quick Start: Jupyter notebook for off-the-shelf inference</a><br>
* <a href="g3doc/running_pets.md">Quick Start: Training a pet detector</a><br> * <a href="g3doc/running_pets.md">Quick Start: Training a pet detector</a><br>
Customizing a Pipeline: Customizing a Pipeline:
* <a href='g3doc/configuring_jobs.md'> * <a href='g3doc/configuring_jobs.md'>
Configuring an object detection pipeline</a><br> Configuring an object detection pipeline</a><br>
* <a href='g3doc/preparing_inputs.md'>Preparing inputs</a><br> * <a href='g3doc/preparing_inputs.md'>Preparing inputs</a><br>
Running: Running:
* <a href='g3doc/running_locally.md'>Running locally</a><br> * <a href='g3doc/running_locally.md'>Running locally</a><br>
* <a href='g3doc/running_on_cloud.md'>Running on the cloud</a><br> * <a href='g3doc/running_on_cloud.md'>Running on the cloud</a><br>
Extras: Extras:
* <a href='g3doc/detection_model_zoo.md'>Tensorflow detection model zoo</a><br> * <a href='g3doc/detection_model_zoo.md'>Tensorflow detection model zoo</a><br>
* <a href='g3doc/exporting_models.md'> * <a href='g3doc/exporting_models.md'>
Exporting a trained model for inference</a><br> Exporting a trained model for inference</a><br>
* <a href='g3doc/tpu_exporters.md'> * <a href='g3doc/tpu_exporters.md'>
Exporting a trained model for TPU inference</a><br> Exporting a trained model for TPU inference</a><br>
* <a href='g3doc/defining_your_own_model.md'> * <a href='g3doc/defining_your_own_model.md'>
Defining your own model architecture</a><br> Defining your own model architecture</a><br>
* <a href='g3doc/using_your_own_dataset.md'> * <a href='g3doc/using_your_own_dataset.md'>
Bringing in your own dataset</a><br> Bringing in your own dataset</a><br>
* <a href='g3doc/evaluation_protocols.md'> * <a href='g3doc/evaluation_protocols.md'>
Supported object detection evaluation protocols</a><br> Supported object detection evaluation protocols</a><br>
* <a href='g3doc/oid_inference_and_evaluation.md'> * <a href='g3doc/oid_inference_and_evaluation.md'>
Inference and evaluation on the Open Images dataset</a><br> Inference and evaluation on the Open Images dataset</a><br>
* <a href='g3doc/instance_segmentation.md'> * <a href='g3doc/instance_segmentation.md'>
Run an instance segmentation model</a><br> Run an instance segmentation model</a><br>
* <a href='g3doc/challenge_evaluation.md'> * <a href='g3doc/challenge_evaluation.md'>
Run the evaluation for the Open Images Challenge 2018/2019</a><br> Run the evaluation for the Open Images Challenge 2018/2019</a><br>
* <a href='g3doc/tpu_compatibility.md'> * <a href='g3doc/tpu_compatibility.md'>
TPU compatible detection pipelines</a><br> TPU compatible detection pipelines</a><br>
* <a href='g3doc/running_on_mobile_tensorflowlite.md'> * <a href='g3doc/running_on_mobile_tensorflowlite.md'>
Running object detection on mobile devices with TensorFlow Lite</a><br> Running object detection on mobile devices with TensorFlow Lite</a><br>
* <a href='g3doc/context_rcnn.md'>
Context R-CNN documentation for data preparation, training, and export</a><br>
## Getting Help ## Getting Help
...@@ -98,78 +99,107 @@ tensorflow/models GitHub ...@@ -98,78 +99,107 @@ tensorflow/models GitHub
[issue tracker](https://github.com/tensorflow/models/issues), prefixing the [issue tracker](https://github.com/tensorflow/models/issues), prefixing the
issue name with "object_detection". issue name with "object_detection".
Please check [FAQ](g3doc/faq.md) for frequently asked questions before Please check [FAQ](g3doc/faq.md) for frequently asked questions before reporting
reporting an issue. an issue.
## Release information ## Release information
### June 17th, 2020
We have released [Context R-CNN](https://arxiv.org/abs/1912.03538), a model that
uses attention to incorporate contextual information images (e.g. from
temporally nearby frames taken by a static camera) in order to improve accuracy.
Importantly, these contextual images need not be labeled.
* When applied to a challenging wildlife detection dataset ([Snapshot Serengeti](http://lila.science/datasets/snapshot-serengeti)),
Context R-CNN with context from up to a month of images outperforms a
single-frame baseline by 17.9% mAP, and outperforms S3D (a 3d convolution
based baseline) by 11.2% mAP.
* Context R-CNN leverages temporal context from the unlabeled frames of a
novel camera deployment to improve performance at that camera, boosting
model generalizeability.
Read about Context R-CNN on the Google AI blog [here](https://ai.googleblog.com/2020/06/leveraging-temporal-context-for-object.html).
We have provided code for generating data with associated context
[here](g3doc/context_rcnn.md), and a sample config for a Context R-CNN
model [here](samples/configs/context_rcnn_resnet101_snapshot_serengeti_sync.config).
Snapshot Serengeti-trained Faster R-CNN and Context R-CNN models can be found in
the [model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md#snapshot-serengeti-camera-trap-trained-models).
A colab demonstrating Context R-CNN is provided
[here](colab_tutorials/context_rcnn_tutorial.ipynb).
<b>Thanks to contributors</b>: Sara Beery, Jonathan Huang, Guanhang Wu, Vivek
Rathod, Ronny Votel, Zhichao Lu, David Ross, Pietro Perona, Tanya Birch, and
the Wildlife Insights AI Team.
### May 19th, 2020 ### May 19th, 2020
We have released
[MobileDets](https://arxiv.org/abs/2004.14525),
a set of high-performance models for mobile CPUs, DSPs and EdgeTPUs.
* MobileDets outperform MobileNetV3+SSDLite by 1.7 mAP at comparable mobile CPU We have released [MobileDets](https://arxiv.org/abs/2004.14525), a set of
inference latencies. MobileDets also outperform MobileNetV2+SSDLite by 1.9 mAP high-performance models for mobile CPUs, DSPs and EdgeTPUs.
on mobile CPUs, 3.7 mAP on EdgeTPUs and 3.4 mAP on DSPs while running equally
fast. MobileDets also offer up to 2x speedup over MnasFPN on EdgeTPUs and DSPs. * MobileDets outperform MobileNetV3+SSDLite by 1.7 mAP at comparable mobile
CPU inference latencies. MobileDets also outperform MobileNetV2+SSDLite by
1.9 mAP on mobile CPUs, 3.7 mAP on EdgeTPUs and 3.4 mAP on DSPs while
running equally fast. MobileDets also offer up to 2x speedup over MnasFPN on
EdgeTPUs and DSPs.
For each of the three hardware platforms we have released model definition, For each of the three hardware platforms we have released model definition,
model checkpoints trained on the COCO14 dataset and converted TFLite models in model checkpoints trained on the COCO14 dataset and converted TFLite models in
fp32 and/or uint8. fp32 and/or uint8.
<b>Thanks to contributors</b>: Yunyang Xiong, Hanxiao Liu, Suyog Gupta, <b>Thanks to contributors</b>: Yunyang Xiong, Hanxiao Liu, Suyog Gupta, Berkin
Berkin Akin, Gabriel Bender, Pieter-Jan Kindermans, Mingxing Tan, Vikas Singh, Akin, Gabriel Bender, Pieter-Jan Kindermans, Mingxing Tan, Vikas Singh, Bo Chen,
Bo Chen, Quoc Le, Zhichao Lu. Quoc Le, Zhichao Lu.
### May 7th, 2020 ### May 7th, 2020
We have released a mobile model with the We have released a mobile model with the
[MnasFPN head](https://arxiv.org/abs/1912.01106). [MnasFPN head](https://arxiv.org/abs/1912.01106).
* MnasFPN with MobileNet-V2 backbone is the most accurate (26.6 mAP at 183ms
on Pixel 1) mobile detection model we have released to date. With
depth-multiplier, MnasFPN with MobileNet-V2 backbone is 1.8 mAP higher than
MobileNet-V3-Large with SSDLite (23.8 mAP vs 22.0 mAP) at similar latency
(120ms) on Pixel 1.
* MnasFPN with MobileNet-V2 backbone is the most accurate (26.6 mAP at 183ms on We have released model definition, model checkpoints trained on the COCO14
Pixel 1) mobile detection model we have released to date. With depth-multiplier, dataset and a converted TFLite model.
MnasFPN with MobileNet-V2 backbone is 1.8 mAP higher than MobileNet-V3-Large
with SSDLite (23.8 mAP vs 22.0 mAP) at similar latency (120ms) on Pixel 1.
We have released model definition, model checkpoints trained on
the COCO14 dataset and a converted TFLite model.
<b>Thanks to contributors</b>: Bo Chen, Golnaz Ghiasi, Hanxiao Liu,
Tsung-Yi Lin, Dmitry Kalenichenko, Hartwig Adam, Quoc Le, Zhichao Lu,
Jonathan Huang, Hao Xu.
<b>Thanks to contributors</b>: Bo Chen, Golnaz Ghiasi, Hanxiao Liu, Tsung-Yi
Lin, Dmitry Kalenichenko, Hartwig Adam, Quoc Le, Zhichao Lu, Jonathan Huang, Hao
Xu.
### Nov 13th, 2019 ### Nov 13th, 2019
We have released MobileNetEdgeTPU SSDLite model. We have released MobileNetEdgeTPU SSDLite model.
* SSDLite with MobileNetEdgeTPU backbone, which achieves 10% mAP higher than * SSDLite with MobileNetEdgeTPU backbone, which achieves 10% mAP higher than
MobileNetV2 SSDLite (24.3 mAP vs 22 mAP) on a Google Pixel4 at comparable MobileNetV2 SSDLite (24.3 mAP vs 22 mAP) on a Google Pixel4 at comparable
latency (6.6ms vs 6.8ms). latency (6.6ms vs 6.8ms).
Along with the model definition, we are also releasing model checkpoints Along with the model definition, we are also releasing model checkpoints trained
trained on the COCO dataset. on the COCO dataset.
<b>Thanks to contributors</b>: Yunyang Xiong, Bo Chen, Suyog Gupta, Hanxiao Liu, <b>Thanks to contributors</b>: Yunyang Xiong, Bo Chen, Suyog Gupta, Hanxiao Liu,
Gabriel Bender, Mingxing Tan, Berkin Akin, Zhichao Lu, Quoc Le Gabriel Bender, Mingxing Tan, Berkin Akin, Zhichao Lu, Quoc Le
### Oct 15th, 2019 ### Oct 15th, 2019
We have released two MobileNet V3 SSDLite models (presented in We have released two MobileNet V3 SSDLite models (presented in
[Searching for MobileNetV3](https://arxiv.org/abs/1905.02244)). [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244)).
* SSDLite with MobileNet-V3-Large backbone, which is 27% faster than Mobilenet * SSDLite with MobileNet-V3-Large backbone, which is 27% faster than Mobilenet
V2 SSDLite (119ms vs 162ms) on a Google Pixel phone CPU at the same mAP. V2 SSDLite (119ms vs 162ms) on a Google Pixel phone CPU at the same mAP.
* SSDLite with MobileNet-V3-Small backbone, which is 37% faster than MnasNet * SSDLite with MobileNet-V3-Small backbone, which is 37% faster than MnasNet
SSDLite reduced with depth-multiplier (43ms vs 68ms) at the same mAP. SSDLite reduced with depth-multiplier (43ms vs 68ms) at the same mAP.
Along with the model definition, we are also releasing model checkpoints Along with the model definition, we are also releasing model checkpoints trained
trained on the COCO dataset. on the COCO dataset.
<b>Thanks to contributors</b>: Bo Chen, Zhichao Lu, Vivek Rathod, Jonathan Huang <b>Thanks to contributors</b>: Bo Chen, Zhichao Lu, Vivek Rathod, Jonathan Huang
### July 1st, 2019 ### July 1st, 2019
We have released an updated set of utils and an updated We have released an updated set of utils and an updated
...@@ -177,28 +207,30 @@ We have released an updated set of utils and an updated ...@@ -177,28 +207,30 @@ We have released an updated set of utils and an updated
[Open Images Challenge 2019](https://storage.googleapis.com/openimages/web/challenge2019.html)! [Open Images Challenge 2019](https://storage.googleapis.com/openimages/web/challenge2019.html)!
The Instance Segmentation metric for The Instance Segmentation metric for
[Open Images V5](https://storage.googleapis.com/openimages/web/index.html) [Open Images V5](https://storage.googleapis.com/openimages/web/index.html) and
and [Challenge 2019](https://storage.googleapis.com/openimages/web/challenge2019.html) [Challenge 2019](https://storage.googleapis.com/openimages/web/challenge2019.html)
is part of this release. Check out [the metric description](https://storage.googleapis.com/openimages/web/evaluation.html#instance_segmentation_eval) is part of this release. Check out
[the metric description](https://storage.googleapis.com/openimages/web/evaluation.html#instance_segmentation_eval)
on the Open Images website. on the Open Images website.
<b>Thanks to contributors</b>: Alina Kuznetsova, Rodrigo Benenson <b>Thanks to contributors</b>: Alina Kuznetsova, Rodrigo Benenson
### Feb 11, 2019 ### Feb 11, 2019
We have released detection models trained on the Open Images Dataset V4 We have released detection models trained on the Open Images Dataset V4 in our
in our detection model zoo, including detection model zoo, including
* Faster R-CNN detector with Inception Resnet V2 feature extractor * Faster R-CNN detector with Inception Resnet V2 feature extractor
* SSD detector with MobileNet V2 feature extractor * SSD detector with MobileNet V2 feature extractor
* SSD detector with ResNet 101 FPN feature extractor (aka RetinaNet-101) * SSD detector with ResNet 101 FPN feature extractor (aka RetinaNet-101)
<b>Thanks to contributors</b>: Alina Kuznetsova, Yinxiao Li <b>Thanks to contributors</b>: Alina Kuznetsova, Yinxiao Li
### Sep 17, 2018 ### Sep 17, 2018
We have released Faster R-CNN detectors with ResNet-50 / ResNet-101 feature We have released Faster R-CNN detectors with ResNet-50 / ResNet-101 feature
extractors trained on the [iNaturalist Species Detection Dataset](https://github.com/visipedia/inat_comp/blob/master/2017/README.md#bounding-boxes). extractors trained on the
[iNaturalist Species Detection Dataset](https://github.com/visipedia/inat_comp/blob/master/2017/README.md#bounding-boxes).
The models are trained on the training split of the iNaturalist data for 4M The models are trained on the training split of the iNaturalist data for 4M
iterations, they achieve 55% and 58% mean AP@.5 over 2854 classes respectively. iterations, they achieve 55% and 58% mean AP@.5 over 2854 classes respectively.
For more details please refer to this [paper](https://arxiv.org/abs/1707.06642). For more details please refer to this [paper](https://arxiv.org/abs/1707.06642).
...@@ -210,42 +242,59 @@ For more details please refer to this [paper](https://arxiv.org/abs/1707.06642). ...@@ -210,42 +242,59 @@ For more details please refer to this [paper](https://arxiv.org/abs/1707.06642).
There are many new updates in this release, extending the functionality and There are many new updates in this release, extending the functionality and
capability of the API: capability of the API:
* Moving from slim-based training to [Estimator](https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator)-based * Moving from slim-based training to
training. [Estimator](https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator)-based
* Support for [RetinaNet](https://arxiv.org/abs/1708.02002), and a [MobileNet](https://ai.googleblog.com/2017/06/mobilenets-open-source-models-for.html) training.
adaptation of RetinaNet. * Support for [RetinaNet](https://arxiv.org/abs/1708.02002), and a
* A novel SSD-based architecture called the [Pooling Pyramid Network](https://arxiv.org/abs/1807.03284) (PPN). [MobileNet](https://ai.googleblog.com/2017/06/mobilenets-open-source-models-for.html)
* Releasing several [TPU](https://cloud.google.com/tpu/)-compatible models. adaptation of RetinaNet.
These can be found in the `samples/configs/` directory with a comment in the * A novel SSD-based architecture called the
pipeline configuration files indicating TPU compatibility. [Pooling Pyramid Network](https://arxiv.org/abs/1807.03284) (PPN).
* Support for quantized training. * Releasing several [TPU](https://cloud.google.com/tpu/)-compatible models.
* Updated documentation for new binaries, Cloud training, and [Tensorflow Lite](https://www.tensorflow.org/mobile/tflite/). These can be found in the `samples/configs/` directory with a comment in the
pipeline configuration files indicating TPU compatibility.
See also our [expanded announcement blogpost](https://ai.googleblog.com/2018/07/accelerated-training-and-inference-with.html) and accompanying tutorial at the [TensorFlow blog](https://medium.com/tensorflow/training-and-serving-a-realtime-mobile-object-detector-in-30-minutes-with-cloud-tpus-b78971cf1193). * Support for quantized training.
* Updated documentation for new binaries, Cloud training, and
[Tensorflow Lite](https://www.tensorflow.org/mobile/tflite/).
See also our
[expanded announcement blogpost](https://ai.googleblog.com/2018/07/accelerated-training-and-inference-with.html)
and accompanying tutorial at the
[TensorFlow blog](https://medium.com/tensorflow/training-and-serving-a-realtime-mobile-object-detector-in-30-minutes-with-cloud-tpus-b78971cf1193).
<b>Thanks to contributors</b>: Sara Robinson, Aakanksha Chowdhery, Derek Chow, <b>Thanks to contributors</b>: Sara Robinson, Aakanksha Chowdhery, Derek Chow,
Pengchong Jin, Jonathan Huang, Vivek Rathod, Zhichao Lu, Ronny Votel Pengchong Jin, Jonathan Huang, Vivek Rathod, Zhichao Lu, Ronny Votel
### June 25, 2018 ### June 25, 2018
Additional evaluation tools for the [Open Images Challenge 2018](https://storage.googleapis.com/openimages/web/challenge.html) are out. Additional evaluation tools for the
Check out our short tutorial on data preparation and running evaluation [here](g3doc/challenge_evaluation.md)! [Open Images Challenge 2018](https://storage.googleapis.com/openimages/web/challenge.html)
are out. Check out our short tutorial on data preparation and running evaluation
[here](g3doc/challenge_evaluation.md)!
<b>Thanks to contributors</b>: Alina Kuznetsova <b>Thanks to contributors</b>: Alina Kuznetsova
### June 5, 2018 ### June 5, 2018
We have released the implementation of evaluation metrics for both tracks of the [Open Images Challenge 2018](https://storage.googleapis.com/openimages/web/challenge.html) as a part of the Object Detection API - see the [evaluation protocols](g3doc/evaluation_protocols.md) for more details. We have released the implementation of evaluation metrics for both tracks of the
Additionally, we have released a tool for hierarchical labels expansion for the Open Images Challenge: check out [oid_hierarchical_labels_expansion.py](dataset_tools/oid_hierarchical_labels_expansion.py). [Open Images Challenge 2018](https://storage.googleapis.com/openimages/web/challenge.html)
as a part of the Object Detection API - see the
[evaluation protocols](g3doc/evaluation_protocols.md) for more details.
Additionally, we have released a tool for hierarchical labels expansion for the
Open Images Challenge: check out
[oid_hierarchical_labels_expansion.py](dataset_tools/oid_hierarchical_labels_expansion.py).
<b>Thanks to contributors</b>: Alina Kuznetsova, Vittorio Ferrari, Jasper Uijlings <b>Thanks to contributors</b>: Alina Kuznetsova, Vittorio Ferrari, Jasper
Uijlings
### April 30, 2018 ### April 30, 2018
We have released a Faster R-CNN detector with ResNet-101 feature extractor trained on [AVA](https://research.google.com/ava/) v2.1. We have released a Faster R-CNN detector with ResNet-101 feature extractor
Compared with other commonly used object detectors, it changes the action classification loss function to per-class Sigmoid loss to handle boxes with multiple labels. trained on [AVA](https://research.google.com/ava/) v2.1. Compared with other
The model is trained on the training split of AVA v2.1 for 1.5M iterations, it achieves mean AP of 11.25% over 60 classes on the validation split of AVA v2.1. commonly used object detectors, it changes the action classification loss
function to per-class Sigmoid loss to handle boxes with multiple labels. The
model is trained on the training split of AVA v2.1 for 1.5M iterations, it
achieves mean AP of 11.25% over 60 classes on the validation split of AVA v2.1.
For more details please refer to this [paper](https://arxiv.org/abs/1705.08421). For more details please refer to this [paper](https://arxiv.org/abs/1705.08421).
<b>Thanks to contributors</b>: Chen Sun, David Ross <b>Thanks to contributors</b>: Chen Sun, David Ross
...@@ -255,84 +304,94 @@ For more details please refer to this [paper](https://arxiv.org/abs/1705.08421). ...@@ -255,84 +304,94 @@ For more details please refer to this [paper](https://arxiv.org/abs/1705.08421).
Supercharge your mobile phones with the next generation mobile object detector! Supercharge your mobile phones with the next generation mobile object detector!
We are adding support for MobileNet V2 with SSDLite presented in We are adding support for MobileNet V2 with SSDLite presented in
[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381). [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381).
This model is 35% faster than Mobilenet V1 SSD on a Google Pixel phone CPU (200ms vs. 270ms) at the same accuracy. This model is 35% faster than Mobilenet V1 SSD on a Google Pixel phone CPU
Along with the model definition, we are also releasing a model checkpoint trained on the COCO dataset. (200ms vs. 270ms) at the same accuracy. Along with the model definition, we are
also releasing a model checkpoint trained on the COCO dataset.
<b>Thanks to contributors</b>: Menglong Zhu, Mark Sandler, Zhichao Lu, Vivek Rathod, Jonathan Huang <b>Thanks to contributors</b>: Menglong Zhu, Mark Sandler, Zhichao Lu, Vivek
Rathod, Jonathan Huang
### February 9, 2018 ### February 9, 2018
We now support instance segmentation!! In this API update we support a number of instance segmentation models similar to those discussed in the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870). For further details refer to We now support instance segmentation!! In this API update we support a number of
[our slides](http://presentations.cocodataset.org/Places17-GMRI.pdf) from the 2017 Coco + Places Workshop. instance segmentation models similar to those discussed in the
Refer to the section on [Running an Instance Segmentation Model](g3doc/instance_segmentation.md) for instructions on how to configure a model [Mask R-CNN paper](https://arxiv.org/abs/1703.06870). For further details refer
that predicts masks in addition to object bounding boxes. to [our slides](http://presentations.cocodataset.org/Places17-GMRI.pdf) from the
2017 Coco + Places Workshop. Refer to the section on
[Running an Instance Segmentation Model](g3doc/instance_segmentation.md) for
instructions on how to configure a model that predicts masks in addition to
object bounding boxes.
<b>Thanks to contributors</b>: Alireza Fathi, Zhichao Lu, Vivek Rathod, Ronny Votel, Jonathan Huang <b>Thanks to contributors</b>: Alireza Fathi, Zhichao Lu, Vivek Rathod, Ronny
Votel, Jonathan Huang
### November 17, 2017 ### November 17, 2017
As a part of the Open Images V3 release we have released: As a part of the Open Images V3 release we have released:
* An implementation of the Open Images evaluation metric and the [protocol](g3doc/evaluation_protocols.md#open-images). * An implementation of the Open Images evaluation metric and the
* Additional tools to separate inference of detection and evaluation (see [this tutorial](g3doc/oid_inference_and_evaluation.md)). [protocol](g3doc/evaluation_protocols.md#open-images).
* A new detection model trained on the Open Images V2 data release (see [Open Images model](g3doc/detection_model_zoo.md#open-images-models)). * Additional tools to separate inference of detection and evaluation (see
[this tutorial](g3doc/oid_inference_and_evaluation.md)).
* A new detection model trained on the Open Images V2 data release (see
[Open Images model](g3doc/detection_model_zoo.md#open-images-models)).
See more information on the [Open Images website](https://github.com/openimages/dataset)! See more information on the
[Open Images website](https://github.com/openimages/dataset)!
<b>Thanks to contributors</b>: Stefan Popov, Alina Kuznetsova <b>Thanks to contributors</b>: Stefan Popov, Alina Kuznetsova
### November 6, 2017 ### November 6, 2017
We have re-released faster versions of our (pre-trained) models in the We have re-released faster versions of our (pre-trained) models in the
<a href='g3doc/detection_model_zoo.md'>model zoo</a>. In addition to what <a href='g3doc/detection_model_zoo.md'>model zoo</a>. In addition to what was
was available before, we are also adding Faster R-CNN models trained on COCO available before, we are also adding Faster R-CNN models trained on COCO with
with Inception V2 and Resnet-50 feature extractors, as well as a Faster R-CNN Inception V2 and Resnet-50 feature extractors, as well as a Faster R-CNN with
with Resnet-101 model trained on the KITTI dataset. Resnet-101 model trained on the KITTI dataset.
<b>Thanks to contributors</b>: Jonathan Huang, Vivek Rathod, Derek Chow, <b>Thanks to contributors</b>: Jonathan Huang, Vivek Rathod, Derek Chow, Tal
Tal Remez, Chen Sun. Remez, Chen Sun.
### October 31, 2017 ### October 31, 2017
We have released a new state-of-the-art model for object detection using We have released a new state-of-the-art model for object detection using the
the Faster-RCNN with the Faster-RCNN with the
[NASNet-A image featurization](https://arxiv.org/abs/1707.07012). This [NASNet-A image featurization](https://arxiv.org/abs/1707.07012). This model
model achieves mAP of 43.1% on the test-dev validation dataset for COCO, achieves mAP of 43.1% on the test-dev validation dataset for COCO, improving on
improving on the best available model in the zoo by 6% in terms the best available model in the zoo by 6% in terms of absolute mAP.
of absolute mAP.
<b>Thanks to contributors</b>: Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc Le <b>Thanks to contributors</b>: Barret Zoph, Vijay Vasudevan, Jonathon Shlens,
Quoc Le
### August 11, 2017 ### August 11, 2017
We have released an update to the [Android Detect We have released an update to the
demo](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android) [Android Detect demo](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android)
which will now run models trained using the Tensorflow Object which will now run models trained using the Tensorflow Object Detection API on
Detection API on an Android device. By default, it currently runs a an Android device. By default, it currently runs a frozen SSD w/Mobilenet
frozen SSD w/Mobilenet detector trained on COCO, but we encourage detector trained on COCO, but we encourage you to try out other detection
you to try out other detection models! models!
<b>Thanks to contributors</b>: Jonathan Huang, Andrew Harp <b>Thanks to contributors</b>: Jonathan Huang, Andrew Harp
### June 15, 2017 ### June 15, 2017
In addition to our base Tensorflow detection model definitions, this In addition to our base Tensorflow detection model definitions, this release
release includes: includes:
* A selection of trainable detection models, including: * A selection of trainable detection models, including:
* Single Shot Multibox Detector (SSD) with MobileNet, * Single Shot Multibox Detector (SSD) with MobileNet,
* SSD with Inception V2, * SSD with Inception V2,
* Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101, * Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101,
* Faster RCNN with Resnet 101, * Faster RCNN with Resnet 101,
* Faster RCNN with Inception Resnet v2 * Faster RCNN with Inception Resnet v2
* Frozen weights (trained on the COCO dataset) for each of the above models to * Frozen weights (trained on the COCO dataset) for each of the above models to
be used for out-of-the-box inference purposes. be used for out-of-the-box inference purposes.
* A [Jupyter notebook](object_detection_tutorial.ipynb) for performing * A [Jupyter notebook](colab_tutorials/object_detection_tutorial.ipynb) for
out-of-the-box inference with one of our released models performing out-of-the-box inference with one of our released models
* Convenient [local training](g3doc/running_locally.md) scripts as well as * Convenient [local training](g3doc/running_locally.md) scripts as well as
distributed training and evaluation pipelines via distributed training and evaluation pipelines via
[Google Cloud](g3doc/running_on_cloud.md). [Google Cloud](g3doc/running_on_cloud.md).
<b>Thanks to contributors</b>: Jonathan Huang, Vivek Rathod, Derek Chow, Chen <b>Thanks to contributors</b>: Jonathan Huang, Vivek Rathod, Derek Chow, Chen
Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer, Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer,
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
"""Tests for box_predictor_builder.""" """Tests for box_predictor_builder."""
import unittest
import mock import mock
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
...@@ -25,8 +26,10 @@ from object_detection.builders import hyperparams_builder ...@@ -25,8 +26,10 @@ from object_detection.builders import hyperparams_builder
from object_detection.predictors import mask_rcnn_box_predictor from object_detection.predictors import mask_rcnn_box_predictor
from object_detection.protos import box_predictor_pb2 from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2 from object_detection.protos import hyperparams_pb2
from object_detection.utils import tf_version
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only Tests.')
class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase): class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
def test_box_predictor_calls_conv_argscope_fn(self): def test_box_predictor_calls_conv_argscope_fn(self):
...@@ -161,6 +164,7 @@ class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase): ...@@ -161,6 +164,7 @@ class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
self.assertFalse(class_head._use_depthwise) self.assertFalse(class_head._use_depthwise)
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only Tests.')
class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase): class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
def test_box_predictor_calls_conv_argscope_fn(self): def test_box_predictor_calls_conv_argscope_fn(self):
...@@ -357,6 +361,7 @@ class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase): ...@@ -357,6 +361,7 @@ class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only Tests.')
class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase): class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
def test_box_predictor_builder_calls_fc_argscope_fn(self): def test_box_predictor_builder_calls_fc_argscope_fn(self):
...@@ -537,6 +542,7 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase): ...@@ -537,6 +542,7 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
._convolve_then_upsample) ._convolve_then_upsample)
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only Tests.')
class RfcnBoxPredictorBuilderTest(tf.test.TestCase): class RfcnBoxPredictorBuilderTest(tf.test.TestCase):
def test_box_predictor_calls_fc_argscope_fn(self): def test_box_predictor_calls_fc_argscope_fn(self):
......
...@@ -25,31 +25,34 @@ from six.moves import zip ...@@ -25,31 +25,34 @@ from six.moves import zip
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from object_detection.builders import calibration_builder from object_detection.builders import calibration_builder
from object_detection.protos import calibration_pb2 from object_detection.protos import calibration_pb2
from object_detection.utils import test_case
class CalibrationBuilderTest(tf.test.TestCase): class CalibrationBuilderTest(test_case.TestCase):
def test_tf_linear_interp1d_map(self): def test_tf_linear_interp1d_map(self):
"""Tests TF linear interpolation mapping to a single number.""" """Tests TF linear interpolation mapping to a single number."""
with self.test_session() as sess: def graph_fn():
tf_x = tf.constant([0., 0.5, 1.]) tf_x = tf.constant([0., 0.5, 1.])
tf_y = tf.constant([0.5, 0.5, 0.5]) tf_y = tf.constant([0.5, 0.5, 0.5])
new_x = tf.constant([0., 0.25, 0.5, 0.75, 1.]) new_x = tf.constant([0., 0.25, 0.5, 0.75, 1.])
tf_map_outputs = calibration_builder._tf_linear_interp1d( tf_map_outputs = calibration_builder._tf_linear_interp1d(
new_x, tf_x, tf_y) new_x, tf_x, tf_y)
tf_map_outputs_np = sess.run([tf_map_outputs]) return tf_map_outputs
self.assertAllClose(tf_map_outputs_np, [[0.5, 0.5, 0.5, 0.5, 0.5]]) tf_map_outputs_np = self.execute(graph_fn, [])
self.assertAllClose(tf_map_outputs_np, [0.5, 0.5, 0.5, 0.5, 0.5])
def test_tf_linear_interp1d_interpolate(self): def test_tf_linear_interp1d_interpolate(self):
"""Tests TF 1d linear interpolation not mapping to a single number.""" """Tests TF 1d linear interpolation not mapping to a single number."""
with self.test_session() as sess: def graph_fn():
tf_x = tf.constant([0., 0.5, 1.]) tf_x = tf.constant([0., 0.5, 1.])
tf_y = tf.constant([0.6, 0.7, 1.0]) tf_y = tf.constant([0.6, 0.7, 1.0])
new_x = tf.constant([0., 0.25, 0.5, 0.75, 1.]) new_x = tf.constant([0., 0.25, 0.5, 0.75, 1.])
tf_interpolate_outputs = calibration_builder._tf_linear_interp1d( tf_interpolate_outputs = calibration_builder._tf_linear_interp1d(
new_x, tf_x, tf_y) new_x, tf_x, tf_y)
tf_interpolate_outputs_np = sess.run([tf_interpolate_outputs]) return tf_interpolate_outputs
self.assertAllClose(tf_interpolate_outputs_np, [[0.6, 0.65, 0.7, 0.85, 1.]]) tf_interpolate_outputs_np = self.execute(graph_fn, [])
self.assertAllClose(tf_interpolate_outputs_np, [0.6, 0.65, 0.7, 0.85, 1.])
@staticmethod @staticmethod
def _get_scipy_interp1d(new_x, x, y): def _get_scipy_interp1d(new_x, x, y):
...@@ -59,12 +62,13 @@ class CalibrationBuilderTest(tf.test.TestCase): ...@@ -59,12 +62,13 @@ class CalibrationBuilderTest(tf.test.TestCase):
def _get_tf_interp1d(self, new_x, x, y): def _get_tf_interp1d(self, new_x, x, y):
"""Helper performing 1d linear interpolation using Tensorflow.""" """Helper performing 1d linear interpolation using Tensorflow."""
with self.test_session() as sess: def graph_fn():
tf_interp_outputs = calibration_builder._tf_linear_interp1d( tf_interp_outputs = calibration_builder._tf_linear_interp1d(
tf.convert_to_tensor(new_x, dtype=tf.float32), tf.convert_to_tensor(new_x, dtype=tf.float32),
tf.convert_to_tensor(x, dtype=tf.float32), tf.convert_to_tensor(x, dtype=tf.float32),
tf.convert_to_tensor(y, dtype=tf.float32)) tf.convert_to_tensor(y, dtype=tf.float32))
np_tf_interp_outputs = sess.run(tf_interp_outputs) return tf_interp_outputs
np_tf_interp_outputs = self.execute(graph_fn, [])
return np_tf_interp_outputs return np_tf_interp_outputs
def test_tf_linear_interp1d_against_scipy_map(self): def test_tf_linear_interp1d_against_scipy_map(self):
...@@ -128,8 +132,7 @@ class CalibrationBuilderTest(tf.test.TestCase): ...@@ -128,8 +132,7 @@ class CalibrationBuilderTest(tf.test.TestCase):
self._add_function_approximation_to_calibration_proto( self._add_function_approximation_to_calibration_proto(
calibration_config, class_agnostic_x, class_agnostic_y, class_id=None) calibration_config, class_agnostic_x, class_agnostic_y, class_id=None)
od_graph = tf.Graph() def graph_fn():
with self.test_session(graph=od_graph) as sess:
calibration_fn = calibration_builder.build(calibration_config) calibration_fn = calibration_builder.build(calibration_config)
# batch_size = 2, num_classes = 2, num_anchors = 2. # batch_size = 2, num_classes = 2, num_anchors = 2.
class_predictions_with_background = tf.constant( class_predictions_with_background = tf.constant(
...@@ -140,7 +143,8 @@ class CalibrationBuilderTest(tf.test.TestCase): ...@@ -140,7 +143,8 @@ class CalibrationBuilderTest(tf.test.TestCase):
# Everything should map to 0.5 if classes are ignored. # Everything should map to 0.5 if classes are ignored.
calibrated_scores = calibration_fn(class_predictions_with_background) calibrated_scores = calibration_fn(class_predictions_with_background)
calibrated_scores_np = sess.run(calibrated_scores) return calibrated_scores
calibrated_scores_np = self.execute(graph_fn, [])
self.assertAllClose(calibrated_scores_np, [[[0.05, 0.1, 0.15], self.assertAllClose(calibrated_scores_np, [[[0.05, 0.1, 0.15],
[0.2, 0.25, 0.0]], [0.2, 0.25, 0.0]],
[[0.35, 0.45, 0.55], [[0.35, 0.45, 0.55],
...@@ -161,8 +165,7 @@ class CalibrationBuilderTest(tf.test.TestCase): ...@@ -161,8 +165,7 @@ class CalibrationBuilderTest(tf.test.TestCase):
self._add_function_approximation_to_calibration_proto( self._add_function_approximation_to_calibration_proto(
calibration_config, class_1_x, class_1_y, class_id=1) calibration_config, class_1_x, class_1_y, class_id=1)
od_graph = tf.Graph() def graph_fn():
with self.test_session(graph=od_graph) as sess:
calibration_fn = calibration_builder.build(calibration_config) calibration_fn = calibration_builder.build(calibration_config)
# batch_size = 2, num_classes = 2, num_anchors = 2. # batch_size = 2, num_classes = 2, num_anchors = 2.
class_predictions_with_background = tf.constant( class_predictions_with_background = tf.constant(
...@@ -170,7 +173,8 @@ class CalibrationBuilderTest(tf.test.TestCase): ...@@ -170,7 +173,8 @@ class CalibrationBuilderTest(tf.test.TestCase):
[[0.6, 0.4], [0.08, 0.92]]], [[0.6, 0.4], [0.08, 0.92]]],
dtype=tf.float32) dtype=tf.float32)
calibrated_scores = calibration_fn(class_predictions_with_background) calibrated_scores = calibration_fn(class_predictions_with_background)
calibrated_scores_np = sess.run(calibrated_scores) return calibrated_scores
calibrated_scores_np = self.execute(graph_fn, [])
self.assertAllClose(calibrated_scores_np, [[[0.5, 0.6], [0.5, 0.3]], self.assertAllClose(calibrated_scores_np, [[[0.5, 0.6], [0.5, 0.3]],
[[0.5, 0.7], [0.5, 0.96]]]) [[0.5, 0.7], [0.5, 0.96]]])
...@@ -179,8 +183,7 @@ class CalibrationBuilderTest(tf.test.TestCase): ...@@ -179,8 +183,7 @@ class CalibrationBuilderTest(tf.test.TestCase):
calibration_config = calibration_pb2.CalibrationConfig() calibration_config = calibration_pb2.CalibrationConfig()
calibration_config.temperature_scaling_calibration.scaler = 2.0 calibration_config.temperature_scaling_calibration.scaler = 2.0
od_graph = tf.Graph() def graph_fn():
with self.test_session(graph=od_graph) as sess:
calibration_fn = calibration_builder.build(calibration_config) calibration_fn = calibration_builder.build(calibration_config)
# batch_size = 2, num_classes = 2, num_anchors = 2. # batch_size = 2, num_classes = 2, num_anchors = 2.
class_predictions_with_background = tf.constant( class_predictions_with_background = tf.constant(
...@@ -188,7 +191,8 @@ class CalibrationBuilderTest(tf.test.TestCase): ...@@ -188,7 +191,8 @@ class CalibrationBuilderTest(tf.test.TestCase):
[[0.6, 0.7, 0.8], [0.9, 1.0, 1.0]]], [[0.6, 0.7, 0.8], [0.9, 1.0, 1.0]]],
dtype=tf.float32) dtype=tf.float32)
calibrated_scores = calibration_fn(class_predictions_with_background) calibrated_scores = calibration_fn(class_predictions_with_background)
calibrated_scores_np = sess.run(calibrated_scores) return calibrated_scores
calibrated_scores_np = self.execute(graph_fn, [])
self.assertAllClose(calibrated_scores_np, self.assertAllClose(calibrated_scores_np,
[[[0.05, 0.1, 0.15], [0.2, 0.25, 0.0]], [[[0.05, 0.1, 0.15], [0.2, 0.25, 0.0]],
[[0.3, 0.35, 0.4], [0.45, 0.5, 0.5]]]) [[0.3, 0.35, 0.4], [0.45, 0.5, 0.5]]])
...@@ -212,8 +216,7 @@ class CalibrationBuilderTest(tf.test.TestCase): ...@@ -212,8 +216,7 @@ class CalibrationBuilderTest(tf.test.TestCase):
calibration_config = calibration_pb2.CalibrationConfig() calibration_config = calibration_pb2.CalibrationConfig()
self._add_function_approximation_to_calibration_proto( self._add_function_approximation_to_calibration_proto(
calibration_config, class_0_x, class_0_y, class_id=0) calibration_config, class_0_x, class_0_y, class_id=0)
od_graph = tf.Graph() def graph_fn():
with self.test_session(graph=od_graph) as sess:
calibration_fn = calibration_builder.build(calibration_config) calibration_fn = calibration_builder.build(calibration_config)
# batch_size = 2, num_classes = 2, num_anchors = 2. # batch_size = 2, num_classes = 2, num_anchors = 2.
class_predictions_with_background = tf.constant( class_predictions_with_background = tf.constant(
...@@ -221,7 +224,8 @@ class CalibrationBuilderTest(tf.test.TestCase): ...@@ -221,7 +224,8 @@ class CalibrationBuilderTest(tf.test.TestCase):
[[0.6, 0.4], [0.08, 0.92]]], [[0.6, 0.4], [0.08, 0.92]]],
dtype=tf.float32) dtype=tf.float32)
calibrated_scores = calibration_fn(class_predictions_with_background) calibrated_scores = calibration_fn(class_predictions_with_background)
calibrated_scores_np = sess.run(calibrated_scores) return calibrated_scores
calibrated_scores_np = self.execute(graph_fn, [])
self.assertAllClose(calibrated_scores_np, [[[0.5, 0.2], [0.5, 0.1]], self.assertAllClose(calibrated_scores_np, [[[0.5, 0.2], [0.5, 0.1]],
[[0.5, 0.4], [0.5, 0.92]]]) [[0.5, 0.4], [0.5, 0.92]]])
......
...@@ -29,7 +29,6 @@ from __future__ import print_function ...@@ -29,7 +29,6 @@ from __future__ import print_function
import functools import functools
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from tensorflow.contrib import data as tf_data
from object_detection.builders import decoder_builder from object_detection.builders import decoder_builder
from object_detection.protos import input_reader_pb2 from object_detection.protos import input_reader_pb2
...@@ -94,7 +93,7 @@ def read_dataset(file_read_func, input_files, config, ...@@ -94,7 +93,7 @@ def read_dataset(file_read_func, input_files, config,
filename_dataset = filename_dataset.repeat(config.num_epochs or None) filename_dataset = filename_dataset.repeat(config.num_epochs or None)
records_dataset = filename_dataset.apply( records_dataset = filename_dataset.apply(
tf_data.parallel_interleave( tf.data.experimental.parallel_interleave(
file_read_func, file_read_func,
cycle_length=num_readers, cycle_length=num_readers,
block_length=config.read_block_length, block_length=config.read_block_length,
...@@ -153,6 +152,30 @@ def build(input_reader_config, batch_size=None, transform_input_data_fn=None, ...@@ -153,6 +152,30 @@ def build(input_reader_config, batch_size=None, transform_input_data_fn=None,
if not config.input_path: if not config.input_path:
raise ValueError('At least one input path must be specified in ' raise ValueError('At least one input path must be specified in '
'`input_reader_config`.') '`input_reader_config`.')
def dataset_map_fn(dataset, fn_to_map, batch_size=None,
input_reader_config=None):
"""Handles whether or not to use the legacy map function.
Args:
dataset: A tf.Dataset.
fn_to_map: The function to be mapped for that dataset.
batch_size: Batch size. If batch size is None, no batching is performed.
input_reader_config: A input_reader_pb2.InputReader object.
Returns:
A tf.data.Dataset mapped with fn_to_map.
"""
if hasattr(dataset, 'map_with_legacy_function'):
if batch_size:
num_parallel_calls = batch_size * (
input_reader_config.num_parallel_batches)
else:
num_parallel_calls = input_reader_config.num_parallel_map_calls
dataset = dataset.map_with_legacy_function(
fn_to_map, num_parallel_calls=num_parallel_calls)
else:
dataset = dataset.map(fn_to_map, tf.data.experimental.AUTOTUNE)
return dataset
shard_fn = shard_function_for_context(input_context) shard_fn = shard_function_for_context(input_context)
if input_context is not None: if input_context is not None:
batch_size = input_context.get_per_replica_batch_size(batch_size) batch_size = input_context.get_per_replica_batch_size(batch_size)
...@@ -163,15 +186,16 @@ def build(input_reader_config, batch_size=None, transform_input_data_fn=None, ...@@ -163,15 +186,16 @@ def build(input_reader_config, batch_size=None, transform_input_data_fn=None,
dataset = dataset.shard(input_reader_config.sample_1_of_n_examples, 0) dataset = dataset.shard(input_reader_config.sample_1_of_n_examples, 0)
# TODO(rathodv): make batch size a required argument once the old binaries # TODO(rathodv): make batch size a required argument once the old binaries
# are deleted. # are deleted.
dataset = dataset.map(decoder.decode, tf.data.experimental.AUTOTUNE) dataset = dataset_map_fn(dataset, decoder.decode, batch_size,
input_reader_config)
if reduce_to_frame_fn: if reduce_to_frame_fn:
dataset = reduce_to_frame_fn(dataset) dataset = reduce_to_frame_fn(dataset, dataset_map_fn, batch_size,
input_reader_config)
if transform_input_data_fn is not None: if transform_input_data_fn is not None:
dataset = dataset.map(transform_input_data_fn, dataset = dataset_map_fn(dataset, transform_input_data_fn,
tf.data.experimental.AUTOTUNE) batch_size, input_reader_config)
if batch_size: if batch_size:
dataset = dataset.apply( dataset = dataset.batch(batch_size, drop_remainder=True)
tf_data.batch_and_drop_remainder(batch_size))
dataset = dataset.prefetch(input_reader_config.num_prefetch_batches) dataset = dataset.prefetch(input_reader_config.num_prefetch_batches)
return dataset return dataset
......
...@@ -197,13 +197,13 @@ class DatasetBuilderTest(test_case.TestCase): ...@@ -197,13 +197,13 @@ class DatasetBuilderTest(test_case.TestCase):
output_dict[fields.InputDataFields.groundtruth_boxes][0][0]) output_dict[fields.InputDataFields.groundtruth_boxes][0][0])
def get_mock_reduce_to_frame_fn(self): def get_mock_reduce_to_frame_fn(self):
def mock_reduce_to_frame_fn(dataset): def mock_reduce_to_frame_fn(dataset, dataset_map_fn, batch_size, config):
def get_frame(tensor_dict): def get_frame(tensor_dict):
out_tensor_dict = {} out_tensor_dict = {}
out_tensor_dict[fields.InputDataFields.source_id] = ( out_tensor_dict[fields.InputDataFields.source_id] = (
tensor_dict[fields.InputDataFields.source_id][0]) tensor_dict[fields.InputDataFields.source_id][0])
return out_tensor_dict return out_tensor_dict
return dataset.map(get_frame, tf.data.experimental.AUTOTUNE) return dataset_map_fn(dataset, get_frame, batch_size, config)
return mock_reduce_to_frame_fn return mock_reduce_to_frame_fn
def test_build_tf_record_input_reader_sequence_example_train(self): def test_build_tf_record_input_reader_sequence_example_train(self):
...@@ -390,7 +390,7 @@ class DatasetBuilderTest(test_case.TestCase): ...@@ -390,7 +390,7 @@ class DatasetBuilderTest(test_case.TestCase):
return iter1.get_next(), iter2.get_next() return iter1.get_next(), iter2.get_next()
output_dict1, output_dict2 = self.execute(graph_fn, []) output_dict1, output_dict2 = self.execute(graph_fn, [])
self.assertAllEqual(['0'], output_dict1[fields.InputDataFields.source_id]) self.assertAllEqual([b'0'], output_dict1[fields.InputDataFields.source_id])
self.assertEqual([b'1'], output_dict2[fields.InputDataFields.source_id]) self.assertEqual([b'1'], output_dict2[fields.InputDataFields.source_id])
def test_sample_one_of_n_shards(self): def test_sample_one_of_n_shards(self):
...@@ -537,8 +537,15 @@ class ReadDatasetTest(test_case.TestCase): ...@@ -537,8 +537,15 @@ class ReadDatasetTest(test_case.TestCase):
def graph_fn(): def graph_fn():
keys = [1, 0, -1] keys = [1, 0, -1]
dataset = tf.data.Dataset.from_tensor_slices([[1, 2, -1, 5]]) dataset = tf.data.Dataset.from_tensor_slices([[1, 2, -1, 5]])
table = contrib_lookup.HashTable( try:
initializer=contrib_lookup.KeyValueTensorInitializer( # Dynamically try to load the tf v2 lookup, falling back to contrib
lookup = tf.compat.v2.lookup
hash_table_class = tf.compat.v2.lookup.StaticHashTable
except AttributeError:
lookup = contrib_lookup
hash_table_class = contrib_lookup.HashTable
table = hash_table_class(
initializer=lookup.KeyValueTensorInitializer(
keys=keys, values=list(reversed(keys))), keys=keys, values=list(reversed(keys))),
default_value=100) default_value=100)
dataset = dataset.map(table.lookup) dataset = dataset.map(table.lookup)
...@@ -559,7 +566,7 @@ class ReadDatasetTest(test_case.TestCase): ...@@ -559,7 +566,7 @@ class ReadDatasetTest(test_case.TestCase):
data = self.execute(graph_fn, []) data = self.execute(graph_fn, [])
# Note that the execute function extracts single outputs if the return # Note that the execute function extracts single outputs if the return
# value is of size 1. # value is of size 1.
self.assertAllEqual( self.assertCountEqual(
data, [ data, [
1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 1, 10, 2, 20, 3, 30, 4, 40, 5, 1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 1, 10, 2, 20, 3, 30, 4, 40, 5,
50 50
...@@ -577,7 +584,7 @@ class ReadDatasetTest(test_case.TestCase): ...@@ -577,7 +584,7 @@ class ReadDatasetTest(test_case.TestCase):
data = self.execute(graph_fn, []) data = self.execute(graph_fn, [])
# Note that the execute function extracts single outputs if the return # Note that the execute function extracts single outputs if the return
# value is of size 1. # value is of size 1.
self.assertAllEqual( self.assertCountEqual(
data, [ data, [
1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 1, 10, 2, 20, 3, 30, 4, 40, 5, 1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 1, 10, 2, 20, 3, 30, 4, 40, 5,
50 50
...@@ -607,12 +614,14 @@ class ReadDatasetTest(test_case.TestCase): ...@@ -607,12 +614,14 @@ class ReadDatasetTest(test_case.TestCase):
def graph_fn(): def graph_fn():
return self._get_dataset_next( return self._get_dataset_next(
[self._shuffle_path_template % '*'], config, batch_size=10) [self._shuffle_path_template % '*'], config, batch_size=10)
expected_non_shuffle_output = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] expected_non_shuffle_output1 = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
expected_non_shuffle_output2 = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
# Note that the execute function extracts single outputs if the return # Note that the execute function extracts single outputs if the return
# value is of size 1. # value is of size 1.
data = self.execute(graph_fn, []) data = self.execute(graph_fn, [])
self.assertAllEqual(data, expected_non_shuffle_output) self.assertTrue(all(data == expected_non_shuffle_output1) or
all(data == expected_non_shuffle_output2))
def test_read_dataset_single_epoch(self): def test_read_dataset_single_epoch(self):
config = input_reader_pb2.InputReader() config = input_reader_pb2.InputReader()
......
...@@ -48,7 +48,7 @@ def build(input_reader_config): ...@@ -48,7 +48,7 @@ def build(input_reader_config):
if input_reader_config.HasField('label_map_path'): if input_reader_config.HasField('label_map_path'):
label_map_proto_file = input_reader_config.label_map_path label_map_proto_file = input_reader_config.label_map_path
input_type = input_reader_config.input_type input_type = input_reader_config.input_type
if input_type == input_reader_pb2.InputType.TF_EXAMPLE: if input_type == input_reader_pb2.InputType.Value('TF_EXAMPLE'):
decoder = tf_example_decoder.TfExampleDecoder( decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=input_reader_config.load_instance_masks, load_instance_masks=input_reader_config.load_instance_masks,
load_multiclass_scores=input_reader_config.load_multiclass_scores, load_multiclass_scores=input_reader_config.load_multiclass_scores,
...@@ -60,7 +60,7 @@ def build(input_reader_config): ...@@ -60,7 +60,7 @@ def build(input_reader_config):
num_keypoints=input_reader_config.num_keypoints, num_keypoints=input_reader_config.num_keypoints,
expand_hierarchy_labels=input_reader_config.expand_labels_hierarchy) expand_hierarchy_labels=input_reader_config.expand_labels_hierarchy)
return decoder return decoder
elif input_type == input_reader_pb2.InputType.TF_SEQUENCE_EXAMPLE: elif input_type == input_reader_pb2.InputType.Value('TF_SEQUENCE_EXAMPLE'):
decoder = tf_sequence_example_decoder.TfSequenceExampleDecoder( decoder = tf_sequence_example_decoder.TfSequenceExampleDecoder(
label_map_proto_file=label_map_proto_file, label_map_proto_file=label_map_proto_file,
load_context_features=input_reader_config.load_context_features) load_context_features=input_reader_config.load_context_features)
......
...@@ -29,6 +29,7 @@ from object_detection.core import standard_fields as fields ...@@ -29,6 +29,7 @@ from object_detection.core import standard_fields as fields
from object_detection.dataset_tools import seq_example_util from object_detection.dataset_tools import seq_example_util
from object_detection.protos import input_reader_pb2 from object_detection.protos import input_reader_pb2
from object_detection.utils import dataset_util from object_detection.utils import dataset_util
from object_detection.utils import test_case
def _get_labelmap_path(): def _get_labelmap_path():
...@@ -38,17 +39,20 @@ def _get_labelmap_path(): ...@@ -38,17 +39,20 @@ def _get_labelmap_path():
'pet_label_map.pbtxt') 'pet_label_map.pbtxt')
class DecoderBuilderTest(tf.test.TestCase): class DecoderBuilderTest(test_case.TestCase):
def _make_serialized_tf_example(self, has_additional_channels=False): def _make_serialized_tf_example(self, has_additional_channels=False):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) image_tensor_np = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
additional_channels_tensor = np.random.randint( additional_channels_tensor_np = np.random.randint(
255, size=(4, 5, 1)).astype(np.uint8) 255, size=(4, 5, 1)).astype(np.uint8)
flat_mask = (4 * 5) * [1.0] flat_mask = (4 * 5) * [1.0]
with self.test_session(): def graph_fn(image_tensor):
encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() encoded_jpeg = tf.image.encode_jpeg(image_tensor)
encoded_additional_channels_jpeg = tf.image.encode_jpeg( return encoded_jpeg
tf.constant(additional_channels_tensor)).eval() encoded_jpeg = self.execute_cpu(graph_fn, [image_tensor_np])
encoded_additional_channels_jpeg = self.execute_cpu(
graph_fn, [additional_channels_tensor_np])
features = { features = {
'image/source_id': dataset_util.bytes_feature('0'.encode()), 'image/source_id': dataset_util.bytes_feature('0'.encode()),
'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
...@@ -71,46 +75,45 @@ class DecoderBuilderTest(tf.test.TestCase): ...@@ -71,46 +75,45 @@ class DecoderBuilderTest(tf.test.TestCase):
def _make_random_serialized_jpeg_images(self, num_frames, image_height, def _make_random_serialized_jpeg_images(self, num_frames, image_height,
image_width): image_width):
images = tf.cast(tf.random.uniform( def graph_fn():
[num_frames, image_height, image_width, 3], images = tf.cast(tf.random.uniform(
maxval=256, [num_frames, image_height, image_width, 3],
dtype=tf.int32), dtype=tf.uint8) maxval=256,
images_list = tf.unstack(images, axis=0) dtype=tf.int32), dtype=tf.uint8)
encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list] images_list = tf.unstack(images, axis=0)
with tf.Session() as sess: encoded_images = [tf.io.encode_jpeg(image) for image in images_list]
encoded_images = sess.run(encoded_images_list) return encoded_images
return encoded_images return self.execute_cpu(graph_fn, [])
def _make_serialized_tf_sequence_example(self): def _make_serialized_tf_sequence_example(self):
num_frames = 4 num_frames = 4
image_height = 20 image_height = 20
image_width = 30 image_width = 30
image_source_ids = [str(i) for i in range(num_frames)] image_source_ids = [str(i) for i in range(num_frames)]
with self.test_session(): encoded_images = self._make_random_serialized_jpeg_images(
encoded_images = self._make_random_serialized_jpeg_images( num_frames, image_height, image_width)
num_frames, image_height, image_width) sequence_example_serialized = seq_example_util.make_sequence_example(
sequence_example_serialized = seq_example_util.make_sequence_example( dataset_name='video_dataset',
dataset_name='video_dataset', video_id='video',
video_id='video', encoded_images=encoded_images,
encoded_images=encoded_images, image_height=image_height,
image_height=image_height, image_width=image_width,
image_width=image_width, image_source_ids=image_source_ids,
image_source_ids=image_source_ids, image_format='JPEG',
image_format='JPEG', is_annotated=[[1], [1], [1], [1]],
is_annotated=[[1], [1], [1], [1]], bboxes=[
bboxes=[ [[]], # Frame 0.
[[]], # Frame 0. [[0., 0., 1., 1.]], # Frame 1.
[[0., 0., 1., 1.]], # Frame 1. [[0., 0., 1., 1.],
[[0., 0., 1., 1.], [0.1, 0.1, 0.2, 0.2]], # Frame 2.
[0.1, 0.1, 0.2, 0.2]], # Frame 2. [[]], # Frame 3.
[[]], # Frame 3. ],
], label_strings=[
label_strings=[ [], # Frame 0.
[], # Frame 0. ['Abyssinian'], # Frame 1.
['Abyssinian'], # Frame 1. ['Abyssinian', 'american_bulldog'], # Frame 2.
['Abyssinian', 'american_bulldog'], # Frame 2. [], # Frame 3
[], # Frame 3 ]).SerializeToString()
]).SerializeToString()
return sequence_example_serialized return sequence_example_serialized
def test_build_tf_record_input_reader(self): def test_build_tf_record_input_reader(self):
...@@ -119,21 +122,19 @@ class DecoderBuilderTest(tf.test.TestCase): ...@@ -119,21 +122,19 @@ class DecoderBuilderTest(tf.test.TestCase):
text_format.Parse(input_reader_text_proto, input_reader_proto) text_format.Parse(input_reader_text_proto, input_reader_proto)
decoder = decoder_builder.build(input_reader_proto) decoder = decoder_builder.build(input_reader_proto)
tensor_dict = decoder.decode(self._make_serialized_tf_example()) serialized_seq_example = self._make_serialized_tf_example()
def graph_fn():
with tf.train.MonitoredSession() as sess: tensor_dict = decoder.decode(serialized_seq_example)
output_dict = sess.run(tensor_dict) return (tensor_dict[fields.InputDataFields.image],
tensor_dict[fields.InputDataFields.groundtruth_classes],
self.assertNotIn( tensor_dict[fields.InputDataFields.groundtruth_boxes])
fields.InputDataFields.groundtruth_instance_masks, output_dict)
self.assertEqual((4, 5, 3), output_dict[fields.InputDataFields.image].shape) (image, groundtruth_classes,
self.assertAllEqual([2], groundtruth_boxes) = self.execute_cpu(graph_fn, [])
output_dict[fields.InputDataFields.groundtruth_classes]) self.assertEqual((4, 5, 3), image.shape)
self.assertEqual( self.assertAllEqual([2], groundtruth_classes)
(1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape) self.assertEqual((1, 4), groundtruth_boxes.shape)
self.assertAllEqual( self.assertAllEqual([0.0, 0.0, 1.0, 1.0], groundtruth_boxes[0])
[0.0, 0.0, 1.0, 1.0],
output_dict[fields.InputDataFields.groundtruth_boxes][0])
def test_build_tf_record_input_reader_sequence_example(self): def test_build_tf_record_input_reader_sequence_example(self):
label_map_path = _get_labelmap_path() label_map_path = _get_labelmap_path()
...@@ -145,12 +146,16 @@ class DecoderBuilderTest(tf.test.TestCase): ...@@ -145,12 +146,16 @@ class DecoderBuilderTest(tf.test.TestCase):
input_reader_proto.label_map_path = label_map_path input_reader_proto.label_map_path = label_map_path
text_format.Parse(input_reader_text_proto, input_reader_proto) text_format.Parse(input_reader_text_proto, input_reader_proto)
decoder = decoder_builder.build(input_reader_proto) serialized_seq_example = self._make_serialized_tf_sequence_example()
tensor_dict = decoder.decode(self._make_serialized_tf_sequence_example()) def graph_fn():
decoder = decoder_builder.build(input_reader_proto)
with tf.train.MonitoredSession() as sess: tensor_dict = decoder.decode(serialized_seq_example)
output_dict = sess.run(tensor_dict) return (tensor_dict[fields.InputDataFields.image],
tensor_dict[fields.InputDataFields.groundtruth_classes],
tensor_dict[fields.InputDataFields.groundtruth_boxes],
tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
(actual_image, actual_groundtruth_classes, actual_groundtruth_boxes,
actual_num_groundtruth_boxes) = self.execute_cpu(graph_fn, [])
expected_groundtruth_classes = [[-1, -1], [1, -1], [1, 2], [-1, -1]] expected_groundtruth_classes = [[-1, -1], [1, -1], [1, 2], [-1, -1]]
expected_groundtruth_boxes = [[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]], expected_groundtruth_boxes = [[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0]], [[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0]],
...@@ -158,19 +163,14 @@ class DecoderBuilderTest(tf.test.TestCase): ...@@ -158,19 +163,14 @@ class DecoderBuilderTest(tf.test.TestCase):
[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]] [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]]
expected_num_groundtruth_boxes = [0, 1, 2, 0] expected_num_groundtruth_boxes = [0, 1, 2, 0]
self.assertNotIn(
fields.InputDataFields.groundtruth_instance_masks, output_dict)
# Sequence example images are encoded. # Sequence example images are encoded.
self.assertEqual((4,), output_dict[fields.InputDataFields.image].shape) self.assertEqual((4,), actual_image.shape)
self.assertAllEqual(expected_groundtruth_classes, self.assertAllEqual(expected_groundtruth_classes,
output_dict[fields.InputDataFields.groundtruth_classes]) actual_groundtruth_classes)
self.assertEqual(
(4, 2, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
self.assertAllClose(expected_groundtruth_boxes, self.assertAllClose(expected_groundtruth_boxes,
output_dict[fields.InputDataFields.groundtruth_boxes]) actual_groundtruth_boxes)
self.assertAllClose( self.assertAllClose(
expected_num_groundtruth_boxes, expected_num_groundtruth_boxes, actual_num_groundtruth_boxes)
output_dict[fields.InputDataFields.num_groundtruth_boxes])
def test_build_tf_record_input_reader_and_load_instance_masks(self): def test_build_tf_record_input_reader_and_load_instance_masks(self):
input_reader_text_proto = """ input_reader_text_proto = """
...@@ -181,14 +181,12 @@ class DecoderBuilderTest(tf.test.TestCase): ...@@ -181,14 +181,12 @@ class DecoderBuilderTest(tf.test.TestCase):
text_format.Parse(input_reader_text_proto, input_reader_proto) text_format.Parse(input_reader_text_proto, input_reader_proto)
decoder = decoder_builder.build(input_reader_proto) decoder = decoder_builder.build(input_reader_proto)
tensor_dict = decoder.decode(self._make_serialized_tf_example()) serialized_seq_example = self._make_serialized_tf_example()
def graph_fn():
with tf.train.MonitoredSession() as sess: tensor_dict = decoder.decode(serialized_seq_example)
output_dict = sess.run(tensor_dict) return tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
masks = self.execute_cpu(graph_fn, [])
self.assertAllEqual( self.assertAllEqual((1, 4, 5), masks.shape)
(1, 4, 5),
output_dict[fields.InputDataFields.groundtruth_instance_masks].shape)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment