"include/composable_kernel/utility/config.hpp.in" did not exist on "121693b3d3b3148010f0756c5ab4741476620aba"
Commit 47bc1813 authored by syiming's avatar syiming
Browse files

Merge remote-tracking branch 'upstream/master' into add_multilevel_crop_and_resize

parents d8611151 b035a227
...@@ -23,6 +23,7 @@ from official.vision.detection.modeling.architecture import heads ...@@ -23,6 +23,7 @@ from official.vision.detection.modeling.architecture import heads
from official.vision.detection.modeling.architecture import identity from official.vision.detection.modeling.architecture import identity
from official.vision.detection.modeling.architecture import nn_ops from official.vision.detection.modeling.architecture import nn_ops
from official.vision.detection.modeling.architecture import resnet from official.vision.detection.modeling.architecture import resnet
from official.vision.detection.modeling.architecture import spinenet
def norm_activation_generator(params): def norm_activation_generator(params):
...@@ -42,6 +43,9 @@ def backbone_generator(params): ...@@ -42,6 +43,9 @@ def backbone_generator(params):
activation=params.norm_activation.activation, activation=params.norm_activation.activation,
norm_activation=norm_activation_generator( norm_activation=norm_activation_generator(
params.norm_activation)) params.norm_activation))
elif params.architecture.backbone == 'spinenet':
spinenet_params = params.spinenet
backbone_fn = spinenet.SpineNetBuilder(model_id=spinenet_params.model_id)
else: else:
raise ValueError('Backbone model `{}` is not supported.' raise ValueError('Backbone model `{}` is not supported.'
.format(params.architecture.backbone)) .format(params.architecture.backbone))
......
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains common building blocks for neural networks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from official.modeling import tf_utils
@tf.keras.utils.register_keras_serializable(package='Vision')
class ResidualBlock(tf.keras.layers.Layer):
"""A residual block."""
def __init__(self,
filters,
strides,
use_projection=False,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
**kwargs):
"""A residual block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super(ResidualBlock, self).__init__(**kwargs)
self._filters = filters
self._strides = strides
self._use_projection = use_projection
self._use_sync_bn = use_sync_bn
self._activation = activation
self._kernel_initializer = kernel_initializer
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
if use_sync_bn:
self._norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
self._norm = tf.keras.layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation_fn = tf_utils.get_activation(activation)
def build(self, input_shape):
if self._use_projection:
self._shortcut = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=1,
strides=self._strides,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm0 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv1 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=3,
strides=self._strides,
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm1 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv2 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=3,
strides=1,
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm2 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
super(ResidualBlock, self).build(input_shape)
def get_config(self):
config = {
'filters': self._filters,
'strides': self._strides,
'use_projection': self._use_projection,
'kernel_initializer': self._kernel_initializer,
'kernel_regularizer': self._kernel_regularizer,
'bias_regularizer': self._bias_regularizer,
'activation': self._activation,
'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon
}
base_config = super(ResidualBlock, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def call(self, inputs):
shortcut = inputs
if self._use_projection:
shortcut = self._shortcut(shortcut)
shortcut = self._norm0(shortcut)
x = self._conv1(inputs)
x = self._norm1(x)
x = self._activation_fn(x)
x = self._conv2(x)
x = self._norm2(x)
return self._activation_fn(x + shortcut)
@tf.keras.utils.register_keras_serializable(package='Vision')
class BottleneckBlock(tf.keras.layers.Layer):
"""A standard bottleneck block."""
def __init__(self,
filters,
strides,
use_projection=False,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
**kwargs):
"""A standard bottleneck block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super(BottleneckBlock, self).__init__(**kwargs)
self._filters = filters
self._strides = strides
self._use_projection = use_projection
self._use_sync_bn = use_sync_bn
self._activation = activation
self._kernel_initializer = kernel_initializer
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
if use_sync_bn:
self._norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
self._norm = tf.keras.layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation_fn = tf_utils.get_activation(activation)
def build(self, input_shape):
if self._use_projection:
self._shortcut = tf.keras.layers.Conv2D(
filters=self._filters * 4,
kernel_size=1,
strides=self._strides,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm0 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv1 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm1 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv2 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=3,
strides=self._strides,
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm2 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv3 = tf.keras.layers.Conv2D(
filters=self._filters * 4,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm3 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
super(BottleneckBlock, self).build(input_shape)
def get_config(self):
config = {
'filters': self._filters,
'strides': self._strides,
'use_projection': self._use_projection,
'kernel_initializer': self._kernel_initializer,
'kernel_regularizer': self._kernel_regularizer,
'bias_regularizer': self._bias_regularizer,
'activation': self._activation,
'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon
}
base_config = super(BottleneckBlock, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def call(self, inputs):
shortcut = inputs
if self._use_projection:
shortcut = self._shortcut(shortcut)
shortcut = self._norm0(shortcut)
x = self._conv1(inputs)
x = self._norm1(x)
x = self._activation_fn(x)
x = self._conv2(x)
x = self._norm2(x)
x = self._activation_fn(x)
x = self._conv3(x)
x = self._norm3(x)
return self._activation_fn(x + shortcut)
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implementation of SpineNet model.
X. Du, T-Y. Lin, P. Jin, G. Ghiasi, M. Tan, Y. Cui, Q. V. Le, X. Song
SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization
https://arxiv.org/abs/1912.05027
"""
import math
from absl import logging
import tensorflow as tf
from tensorflow.python.keras import backend
from official.modeling import tf_utils
from official.vision.detection.modeling.architecture import nn_blocks
layers = tf.keras.layers
FILTER_SIZE_MAP = {
1: 32,
2: 64,
3: 128,
4: 256,
5: 256,
6: 256,
7: 256,
}
# The fixed SpineNet architecture discovered by NAS.
# Each element represents a specification of a building block:
# (block_level, block_fn, (input_offset0, input_offset1), is_output).
SPINENET_BLOCK_SPECS = [
(2, 'bottleneck', (0, 1), False),
(4, 'residual', (0, 1), False),
(3, 'bottleneck', (2, 3), False),
(4, 'bottleneck', (2, 4), False),
(6, 'residual', (3, 5), False),
(4, 'bottleneck', (3, 5), False),
(5, 'residual', (6, 7), False),
(7, 'residual', (6, 8), False),
(5, 'bottleneck', (8, 9), False),
(5, 'bottleneck', (8, 10), False),
(4, 'bottleneck', (5, 10), True),
(3, 'bottleneck', (4, 10), True),
(5, 'bottleneck', (7, 12), True),
(7, 'bottleneck', (5, 14), True),
(6, 'bottleneck', (12, 14), True),
]
SCALING_MAP = {
'49S': {
'endpoints_num_filters': 128,
'filter_size_scale': 0.65,
'resample_alpha': 0.5,
'block_repeats': 1,
},
'49': {
'endpoints_num_filters': 256,
'filter_size_scale': 1.0,
'resample_alpha': 0.5,
'block_repeats': 1,
},
'96': {
'endpoints_num_filters': 256,
'filter_size_scale': 1.0,
'resample_alpha': 0.5,
'block_repeats': 2,
},
'143': {
'endpoints_num_filters': 256,
'filter_size_scale': 1.0,
'resample_alpha': 1.0,
'block_repeats': 3,
},
'190': {
'endpoints_num_filters': 512,
'filter_size_scale': 1.3,
'resample_alpha': 1.0,
'block_repeats': 4,
},
}
class BlockSpec(object):
"""A container class that specifies the block configuration for SpineNet."""
def __init__(self, level, block_fn, input_offsets, is_output):
self.level = level
self.block_fn = block_fn
self.input_offsets = input_offsets
self.is_output = is_output
def build_block_specs(block_specs=None):
"""Builds the list of BlockSpec objects for SpineNet."""
if not block_specs:
block_specs = SPINENET_BLOCK_SPECS
logging.info('Building SpineNet block specs: %s', block_specs)
return [BlockSpec(*b) for b in block_specs]
@tf.keras.utils.register_keras_serializable(package='Vision')
class SpineNet(tf.keras.Model):
"""Class to build SpineNet models."""
def __init__(self,
input_specs=tf.keras.layers.InputSpec(shape=[None, 640, 640, 3]),
min_level=3,
max_level=7,
block_specs=build_block_specs(),
endpoints_num_filters=256,
resample_alpha=0.5,
block_repeats=1,
filter_size_scale=1.0,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
**kwargs):
"""SpineNet model."""
self._min_level = min_level
self._max_level = max_level
self._block_specs = block_specs
self._endpoints_num_filters = endpoints_num_filters
self._resample_alpha = resample_alpha
self._block_repeats = block_repeats
self._filter_size_scale = filter_size_scale
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
if activation == 'relu':
self._activation = tf.nn.relu
elif activation == 'swish':
self._activation = tf.nn.swish
else:
raise ValueError('Activation {} not implemented.'.format(activation))
self._init_block_fn = 'bottleneck'
self._num_init_blocks = 2
if use_sync_bn:
self._norm = layers.experimental.SyncBatchNormalization
else:
self._norm = layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
# Build SpineNet.
inputs = tf.keras.Input(shape=input_specs.shape[1:])
net = self._build_stem(inputs=inputs)
net = self._build_scale_permuted_network(
net=net, input_width=input_specs.shape[1])
net = self._build_endpoints(net=net)
super(SpineNet, self).__init__(inputs=inputs, outputs=net)
def _block_group(self,
inputs,
filters,
strides,
block_fn_cand,
block_repeats=1,
name='block_group'):
"""Creates one group of blocks for the SpineNet model."""
block_fn_candidates = {
'bottleneck': nn_blocks.BottleneckBlock,
'residual': nn_blocks.ResidualBlock,
}
block_fn = block_fn_candidates[block_fn_cand]
_, _, _, num_filters = inputs.get_shape().as_list()
if block_fn_cand == 'bottleneck':
use_projection = not (num_filters == (filters * 4) and strides == 1)
else:
use_projection = not (num_filters == filters and strides == 1)
x = block_fn(
filters=filters,
strides=strides,
use_projection=use_projection,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer,
activation=self._activation,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)(
inputs)
for _ in range(1, block_repeats):
x = block_fn(
filters=filters,
strides=1,
use_projection=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer,
activation=self._activation,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)(
x)
return tf.identity(x, name=name)
def _build_stem(self, inputs):
"""Build SpineNet stem."""
x = layers.Conv2D(
filters=64,
kernel_size=7,
strides=2,
use_bias=False,
padding='same',
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
inputs)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
net = []
# Build the initial level 2 blocks.
for i in range(self._num_init_blocks):
x = self._block_group(
inputs=x,
filters=int(FILTER_SIZE_MAP[2] * self._filter_size_scale),
strides=1,
block_fn_cand=self._init_block_fn,
block_repeats=self._block_repeats,
name='stem_block_{}'.format(i + 1))
net.append(x)
return net
def _build_scale_permuted_network(self,
net,
input_width,
weighted_fusion=False):
"""Build scale-permuted network."""
net_sizes = [int(math.ceil(input_width / 2**2))] * len(net)
net_block_fns = [self._init_block_fn] * len(net)
num_outgoing_connections = [0] * len(net)
endpoints = {}
for i, block_spec in enumerate(self._block_specs):
# Find out specs for the target block.
target_width = int(math.ceil(input_width / 2**block_spec.level))
target_num_filters = int(FILTER_SIZE_MAP[block_spec.level] *
self._filter_size_scale)
target_block_fn = block_spec.block_fn
# Resample then merge input0 and input1.
parents = []
input0 = block_spec.input_offsets[0]
input1 = block_spec.input_offsets[1]
x0 = self._resample_with_alpha(
inputs=net[input0],
input_width=net_sizes[input0],
input_block_fn=net_block_fns[input0],
target_width=target_width,
target_num_filters=target_num_filters,
target_block_fn=target_block_fn,
alpha=self._resample_alpha)
parents.append(x0)
num_outgoing_connections[input0] += 1
x1 = self._resample_with_alpha(
inputs=net[input1],
input_width=net_sizes[input1],
input_block_fn=net_block_fns[input1],
target_width=target_width,
target_num_filters=target_num_filters,
target_block_fn=target_block_fn,
alpha=self._resample_alpha)
parents.append(x1)
num_outgoing_connections[input1] += 1
# Merge 0 outdegree blocks to the output block.
if block_spec.is_output:
for j, (j_feat,
j_connections) in enumerate(zip(net, num_outgoing_connections)):
if j_connections == 0 and (j_feat.shape[2] == target_width and
j_feat.shape[3] == x0.shape[3]):
parents.append(j_feat)
num_outgoing_connections[j] += 1
# pylint: disable=g-direct-tensorflow-import
if weighted_fusion:
dtype = parents[0].dtype
parent_weights = [
tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format(
i, j)), dtype=dtype)) for j in range(len(parents))]
weights_sum = tf.add_n(parent_weights)
parents = [
parents[i] * parent_weights[i] / (weights_sum + 0.0001)
for i in range(len(parents))
]
# Fuse all parent nodes then build a new block.
x = tf_utils.get_activation(self._activation)(tf.add_n(parents))
x = self._block_group(
inputs=x,
filters=target_num_filters,
strides=1,
block_fn_cand=target_block_fn,
block_repeats=self._block_repeats,
name='scale_permuted_block_{}'.format(i + 1))
net.append(x)
net_sizes.append(target_width)
net_block_fns.append(target_block_fn)
num_outgoing_connections.append(0)
# Save output feats.
if block_spec.is_output:
if block_spec.level in endpoints:
raise ValueError('Duplicate feats found for output level {}.'.format(
block_spec.level))
if (block_spec.level < self._min_level or
block_spec.level > self._max_level):
raise ValueError('Output level is out of range [{}, {}]'.format(
self._min_level, self._max_level))
endpoints[block_spec.level] = x
return endpoints
def _build_endpoints(self, net):
"""Match filter size for endpoints before sharing conv layers."""
endpoints = {}
for level in range(self._min_level, self._max_level + 1):
x = layers.Conv2D(
filters=self._endpoints_num_filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
net[level])
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
endpoints[level] = x
return endpoints
def _resample_with_alpha(self,
inputs,
input_width,
input_block_fn,
target_width,
target_num_filters,
target_block_fn,
alpha=0.5):
"""Match resolution and feature dimension."""
_, _, _, input_num_filters = inputs.get_shape().as_list()
if input_block_fn == 'bottleneck':
input_num_filters /= 4
new_num_filters = int(input_num_filters * alpha)
x = layers.Conv2D(
filters=new_num_filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
inputs)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
# Spatial resampling.
if input_width > target_width:
x = layers.Conv2D(
filters=new_num_filters,
kernel_size=3,
strides=2,
padding='SAME',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
x)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
input_width /= 2
while input_width > target_width:
x = layers.MaxPool2D(pool_size=3, strides=2, padding='SAME')(x)
input_width /= 2
elif input_width < target_width:
scale = target_width // input_width
x = layers.UpSampling2D(size=(scale, scale))(x)
# Last 1x1 conv to match filter size.
if target_block_fn == 'bottleneck':
target_num_filters *= 4
x = layers.Conv2D(
filters=target_num_filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
x)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
return x
class SpineNetBuilder(object):
"""SpineNet builder."""
def __init__(self,
model_id,
input_specs=tf.keras.layers.InputSpec(shape=[None, 640, 640, 3]),
min_level=3,
max_level=7,
block_specs=build_block_specs(),
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001):
if model_id not in SCALING_MAP:
raise ValueError(
'SpineNet {} is not a valid architecture.'.format(model_id))
scaling_params = SCALING_MAP[model_id]
self._input_specs = input_specs
self._min_level = min_level
self._max_level = max_level
self._block_specs = block_specs
self._endpoints_num_filters = scaling_params['endpoints_num_filters']
self._resample_alpha = scaling_params['resample_alpha']
self._block_repeats = scaling_params['block_repeats']
self._filter_size_scale = scaling_params['filter_size_scale']
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
self._activation = activation
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
def __call__(self, inputs, is_training=None):
with backend.get_graph().as_default():
model = SpineNet(
input_specs=self._input_specs,
min_level=self._min_level,
max_level=self._max_level,
block_specs=self._block_specs,
endpoints_num_filters=self._endpoints_num_filters,
resample_alpha=self._resample_alpha,
block_repeats=self._block_repeats,
filter_size_scale=self._filter_size_scale,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer,
activation=self._activation,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)
return model(inputs)
...@@ -119,6 +119,24 @@ python3 classifier_trainer.py \ ...@@ -119,6 +119,24 @@ python3 classifier_trainer.py \
--params_override='runtime.num_gpus=$NUM_GPUS' --params_override='runtime.num_gpus=$NUM_GPUS'
``` ```
To train on multiple hosts, each with GPUs attached using
[MultiWorkerMirroredStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/MultiWorkerMirroredStrategy)
please update `runtime` section in gpu.yaml
(or override using `--params_override`) with:
```YAML
# gpu.yaml
runtime:
distribution_strategy: 'multi_worker_mirrored'
worker_hosts: '$HOST1:port,$HOST2:port'
num_gpus: $NUM_GPUS
task_index: 0
```
By having `task_index: 0` on the first host and `task_index: 1` on the second
and so on. `$HOST1` and `$HOST2` are the IP addresses of the hosts, and `port`
can be chosen any free port on the hosts. Only the first host will write
TensorBoard Summaries and save checkpoints.
#### On TPU: #### On TPU:
```bash ```bash
python3 classifier_trainer.py \ python3 classifier_trainer.py \
......
...@@ -235,9 +235,6 @@ def initialize(params: base_configs.ExperimentConfig, ...@@ -235,9 +235,6 @@ def initialize(params: base_configs.ExperimentConfig,
else: else:
data_format = 'channels_last' data_format = 'channels_last'
tf.keras.backend.set_image_data_format(data_format) tf.keras.backend.set_image_data_format(data_format)
distribution_utils.configure_cluster(
params.runtime.worker_hosts,
params.runtime.task_index)
if params.runtime.run_eagerly: if params.runtime.run_eagerly:
# Enable eager execution to allow step-by-step debugging # Enable eager execution to allow step-by-step debugging
tf.config.experimental_run_functions_eagerly(True) tf.config.experimental_run_functions_eagerly(True)
...@@ -296,6 +293,10 @@ def train_and_eval( ...@@ -296,6 +293,10 @@ def train_and_eval(
"""Runs the train and eval path using compile/fit.""" """Runs the train and eval path using compile/fit."""
logging.info('Running train and eval.') logging.info('Running train and eval.')
distribution_utils.configure_cluster(
params.runtime.worker_hosts,
params.runtime.task_index)
# Note: for TPUs, strategy and scope should be created before the dataset # Note: for TPUs, strategy and scope should be created before the dataset
strategy = strategy_override or distribution_utils.get_distribution_strategy( strategy = strategy_override or distribution_utils.get_distribution_strategy(
distribution_strategy=params.runtime.distribution_strategy, distribution_strategy=params.runtime.distribution_strategy,
...@@ -338,7 +339,8 @@ def train_and_eval( ...@@ -338,7 +339,8 @@ def train_and_eval(
optimizer = optimizer_factory.build_optimizer( optimizer = optimizer_factory.build_optimizer(
optimizer_name=params.model.optimizer.name, optimizer_name=params.model.optimizer.name,
base_learning_rate=learning_rate, base_learning_rate=learning_rate,
params=params.model.optimizer.as_dict()) params=params.model.optimizer.as_dict(),
model=model)
metrics_map = _get_metrics(one_hot) metrics_map = _get_metrics(one_hot)
metrics = [metrics_map[metric] for metric in params.train.metrics] metrics = [metrics_map[metric] for metric in params.train.metrics]
......
...@@ -100,6 +100,9 @@ class DatasetConfig(base_config.Config): ...@@ -100,6 +100,9 @@ class DatasetConfig(base_config.Config):
skip_decoding: Whether to skip image decoding when loading from TFDS. skip_decoding: Whether to skip image decoding when loading from TFDS.
cache: whether to cache to dataset examples. Can be used to avoid re-reading cache: whether to cache to dataset examples. Can be used to avoid re-reading
from disk on the second epoch. Requires significant memory overhead. from disk on the second epoch. Requires significant memory overhead.
tf_data_service: The URI of a tf.data service to offload preprocessing onto
during training. The URI should be in the format "protocol://address",
e.g. "grpc://tf-data-service:5050".
mean_subtract: whether or not to apply mean subtraction to the dataset. mean_subtract: whether or not to apply mean subtraction to the dataset.
standardize: whether or not to apply standardization to the dataset. standardize: whether or not to apply standardization to the dataset.
""" """
...@@ -123,6 +126,7 @@ class DatasetConfig(base_config.Config): ...@@ -123,6 +126,7 @@ class DatasetConfig(base_config.Config):
file_shuffle_buffer_size: int = 1024 file_shuffle_buffer_size: int = 1024
skip_decoding: bool = True skip_decoding: bool = True
cache: bool = False cache: bool = False
tf_data_service: Optional[str] = None
mean_subtract: bool = False mean_subtract: bool = False
standardize: bool = False standardize: bool = False
...@@ -449,6 +453,18 @@ class DatasetBuilder: ...@@ -449,6 +453,18 @@ class DatasetBuilder:
# Prefetch overlaps in-feed with training # Prefetch overlaps in-feed with training
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
if self.config.tf_data_service:
if not hasattr(tf.data.experimental, 'service'):
raise ValueError('The tf_data_service flag requires Tensorflow version '
'>= 2.3.0, but the version is {}'.format(
tf.__version__))
dataset = dataset.apply(
tf.data.experimental.service.distribute(
processing_mode='parallel_epochs',
service=self.config.tf_data_service,
job_name='resnet_train'))
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
return dataset return dataset
def parse_record(self, record: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: def parse_record(self, record: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
......
...@@ -18,11 +18,12 @@ from __future__ import division ...@@ -18,11 +18,12 @@ from __future__ import division
# from __future__ import google_type_annotations # from __future__ import google_type_annotations
from __future__ import print_function from __future__ import print_function
from typing import Any, Dict, Text, List
from absl import logging from absl import logging
import tensorflow as tf import tensorflow as tf
import tensorflow_addons as tfa import tensorflow_addons as tfa
from typing import Any, Dict, Text, List
from official.vision.image_classification import learning_rate from official.vision.image_classification import learning_rate
from official.vision.image_classification.configs import base_configs from official.vision.image_classification.configs import base_configs
...@@ -250,7 +251,8 @@ class MovingAverage(tf.keras.optimizers.Optimizer): ...@@ -250,7 +251,8 @@ class MovingAverage(tf.keras.optimizers.Optimizer):
def build_optimizer( def build_optimizer(
optimizer_name: Text, optimizer_name: Text,
base_learning_rate: tf.keras.optimizers.schedules.LearningRateSchedule, base_learning_rate: tf.keras.optimizers.schedules.LearningRateSchedule,
params: Dict[Text, Any]): params: Dict[Text, Any],
model: tf.keras.Model = None):
"""Build the optimizer based on name. """Build the optimizer based on name.
Args: Args:
...@@ -261,6 +263,8 @@ def build_optimizer( ...@@ -261,6 +263,8 @@ def build_optimizer(
params: String -> Any dictionary representing the optimizer params. params: String -> Any dictionary representing the optimizer params.
This should contain optimizer specific parameters such as This should contain optimizer specific parameters such as
`base_learning_rate`, `decay`, etc. `base_learning_rate`, `decay`, etc.
model: The `tf.keras.Model`. This is used for the shadow copy if using
`MovingAverage`.
Returns: Returns:
A tf.keras.Optimizer. A tf.keras.Optimizer.
...@@ -322,10 +326,13 @@ def build_optimizer( ...@@ -322,10 +326,13 @@ def build_optimizer(
# Moving average should be applied last, as it's applied at test time # Moving average should be applied last, as it's applied at test time
moving_average_decay = params.get('moving_average_decay', 0.) moving_average_decay = params.get('moving_average_decay', 0.)
if moving_average_decay is not None and moving_average_decay > 0.: if moving_average_decay is not None and moving_average_decay > 0.:
if model is None:
raise ValueError('`model` must be provided if using `MovingAverage`.')
logging.info('Including moving average decay.') logging.info('Including moving average decay.')
optimizer = MovingAverage( optimizer = MovingAverage(
optimizer, optimizer=optimizer,
average_decay=moving_average_decay) average_decay=moving_average_decay)
optimizer.shadow_copy(model)
return optimizer return optimizer
......
...@@ -19,15 +19,21 @@ from __future__ import division ...@@ -19,15 +19,21 @@ from __future__ import division
# from __future__ import google_type_annotations # from __future__ import google_type_annotations
from __future__ import print_function from __future__ import print_function
import tensorflow as tf
from absl.testing import parameterized from absl.testing import parameterized
import tensorflow as tf
from official.vision.image_classification import optimizer_factory from official.vision.image_classification import optimizer_factory
from official.vision.image_classification.configs import base_configs from official.vision.image_classification.configs import base_configs
class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
def build_toy_model(self) -> tf.keras.Model:
"""Creates a toy `tf.Keras.Model`."""
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(1, input_shape=(1,)))
return model
@parameterized.named_parameters( @parameterized.named_parameters(
('sgd', 'sgd', 0., False), ('sgd', 'sgd', 0., False),
('momentum', 'momentum', 0., False), ('momentum', 'momentum', 0., False),
...@@ -40,6 +46,7 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): ...@@ -40,6 +46,7 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
('rmsprop_ema', 'rmsprop', 0.999, False)) ('rmsprop_ema', 'rmsprop', 0.999, False))
def test_optimizer(self, optimizer_name, moving_average_decay, lookahead): def test_optimizer(self, optimizer_name, moving_average_decay, lookahead):
"""Smoke test to be sure no syntax errors.""" """Smoke test to be sure no syntax errors."""
model = self.build_toy_model()
params = { params = {
'learning_rate': 0.001, 'learning_rate': 0.001,
'rho': 0.09, 'rho': 0.09,
...@@ -51,7 +58,8 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): ...@@ -51,7 +58,8 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
optimizer = optimizer_factory.build_optimizer( optimizer = optimizer_factory.build_optimizer(
optimizer_name=optimizer_name, optimizer_name=optimizer_name,
base_learning_rate=params['learning_rate'], base_learning_rate=params['learning_rate'],
params=params) params=params,
model=model)
self.assertTrue(issubclass(type(optimizer), tf.keras.optimizers.Optimizer)) self.assertTrue(issubclass(type(optimizer), tf.keras.optimizers.Optimizer))
def test_unknown_optimizer(self): def test_unknown_optimizer(self):
......
...@@ -20,49 +20,49 @@ The research models are maintained by their respective authors. ...@@ -20,49 +20,49 @@ The research models are maintained by their respective authors.
| Directory | Name | Description | Maintainer(s) | | Directory | Name | Description | Maintainer(s) |
|-----------|------|-------------|---------------| |-----------|------|-------------|---------------|
| [object_detection](object_detection) | TensorFlow Object Detection API | A framework that makes it easy to construct, train and deploy object detection models<br /><br />A collection of object detection models pre-trained on the COCO dataset, the Kitti dataset, the Open Images dataset, the AVA v2.1 dataset, and the iNaturalist Species Detection Dataset| @jch1, @tombstone, @pkulzc | | [object_detection](object_detection) | TensorFlow Object Detection API | A framework that makes it easy to construct, train and deploy object detection models<br /><br />A collection of object detection models pre-trained on the COCO dataset, the Kitti dataset, the Open Images dataset, the AVA v2.1 dataset, and the iNaturalist Species Detection Dataset| jch1, tombstone, pkulzc |
| [slim](slim) | TensorFlow-Slim Image Classification Model Library | A lightweight high-level API of TensorFlow for defining, training and evaluating image classification models <br />• Inception V1/V2/V3/V4<br />• Inception-ResNet-v2<br />• ResNet V1/V2<br />• VGG 16/19<br />• MobileNet V1/V2/V3<br />• NASNet-A_Mobile/Large<br />• PNASNet-5_Large/Mobile | @sguada, @marksandler2 | | [slim](slim) | TensorFlow-Slim Image Classification Model Library | A lightweight high-level API of TensorFlow for defining, training and evaluating image classification models <br />• Inception V1/V2/V3/V4<br />• Inception-ResNet-v2<br />• ResNet V1/V2<br />• VGG 16/19<br />• MobileNet V1/V2/V3<br />• NASNet-A_Mobile/Large<br />• PNASNet-5_Large/Mobile | sguada, marksandler2 |
## Models and Implementations ## Models and Implementations
### Computer Vision ### Computer Vision
| Directory | Referenece (Paper) | Maintainer(s) | | Directory | Paper(s) | Conference | Maintainer(s) |
|-----------|--------------------|---------------| |-----------|----------|------------|---------------|
| [attention_ocr](attention_ocr) | [Attention-based Extraction of Structured Information from Street View Imagery](https://arxiv.org/abs/1704.03549) | xavigibert | | [attention_ocr](attention_ocr) | [Attention-based Extraction of Structured Information from Street View Imagery](https://arxiv.org/abs/1704.03549) | ICDAR 2017 | xavigibert |
| [autoaugment](autoaugment) | [1] [AutoAugment](https://arxiv.org/abs/1805.09501)<br />[2] [Wide Residual Networks](https://arxiv.org/abs/1605.07146)<br />[3] [Shake-Shake regularization](https://arxiv.org/abs/1705.07485)<br />[4] [ShakeDrop Regularization for Deep Residual Learning](https://arxiv.org/abs/1802.02375) | barretzoph | | [autoaugment](autoaugment) | [1] [AutoAugment](https://arxiv.org/abs/1805.09501)<br />[2] [Wide Residual Networks](https://arxiv.org/abs/1605.07146)<br />[3] [Shake-Shake regularization](https://arxiv.org/abs/1705.07485)<br />[4] [ShakeDrop Regularization for Deep Residual Learning](https://arxiv.org/abs/1802.02375) | [1] CVPR 2019<br />[2] BMVC 2016<br /> [3] ICLR 2017<br /> [4] ICLR 2018 | barretzoph |
| [deeplab](deeplab) | [1] [DeepLabv1](https://arxiv.org/abs/1412.7062)<br />[2] [DeepLabv2](https://arxiv.org/abs/1606.00915)<br />[3] [DeepLabv3](https://arxiv.org/abs/1802.02611)<br />[4] [DeepLabv3+](https://arxiv.org/abs/1706.05587) | aquariusjay, yknzhu | | [deeplab](deeplab) | [1] [DeepLabv1: Semantic Image Segmentation with Deep Convolutional Nets and Fully Connected CRFs](https://arxiv.org/abs/1412.7062)<br />[2] [DeepLabv2: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs](https://arxiv.org/abs/1606.00915)<br />[3] [DeepLabv3: Rethinking Atrous Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1706.05587)<br />[4] [DeepLabv3+: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611)<br />| [1] ICLR 2015 <br />[2] TPAMI 2017 <br />[4] ECCV 2018 | aquariusjay, yknzhu |
| [delf](delf) | [1] DELF (DEep Local Features): [Large-Scale Image Retrieval with Attentive Deep Local Features](https://arxiv.org/abs/1612.06321)<br />[2] [Detect-to-Retrieve](https://arxiv.org/abs/1812.01584) | andrefaraujo | | [delf](delf) | [1] DELF (DEep Local Features): [Large-Scale Image Retrieval with Attentive Deep Local Features](https://arxiv.org/abs/1612.06321)<br />[2] [Detect-to-Retrieve: Efficient Regional Aggregation for Image Search](https://arxiv.org/abs/1812.01584)<br />[3] DELG (DEep Local and Global features): [Unifying Deep Local and Global Features for Image Search](https://arxiv.org/abs/2001.05027)<br />[4] GLDv2: [Google Landmarks Dataset v2 -- A Large-Scale Benchmark for Instance-Level Recognition and Retrieval](https://arxiv.org/abs/2004.01804) | [1] ICCV 2017<br />[2] CVPR 2019<br />[4] CVPR 2020 | andrefaraujo |
| [lstm_object_detection](lstm_object_detection) | [Mobile Video Object Detection with Temporally-Aware Feature Maps](https://arxiv.org/abs/1711.06368) | yinxiaoli, yongzhe2160, lzyuan | | [lstm_object_detection](lstm_object_detection) | [Mobile Video Object Detection with Temporally-Aware Feature Maps](https://arxiv.org/abs/1711.06368) | CVPR 2018 | yinxiaoli, yongzhe2160, lzyuan |
| [marco](marco) | [Classification of crystallization outcomes using deep convolutional neural networks](https://arxiv.org/abs/1803.10342) | vincentvanhoucke | | [marco](marco) | MARCO: [Classification of crystallization outcomes using deep convolutional neural networks](https://arxiv.org/abs/1803.10342) | | vincentvanhoucke |
| [vid2depth](vid2depth) | [Unsupervised Learning of Depth and Ego-Motion from Monocular Video Using 3D Geometric Constraints](https://arxiv.org/abs/1802.05522) | rezama | | [vid2depth](vid2depth) | [Unsupervised Learning of Depth and Ego-Motion from Monocular Video Using 3D Geometric Constraints](https://arxiv.org/abs/1802.05522) | CVPR 2018 | rezama |
### Natural Language Processing ### Natural Language Processing
| Directory | Referenece (Paper) | Maintainer(s) | | Directory | Paper(s) | Conference | Maintainer(s) |
|-----------|--------------------|---------------| |-----------|----------|------------|---------------|
| [adversarial_text](adversarial_text) | [1] [Adversarial Training Methods for Semi-Supervised Text](https://arxiv.org/abs/1605.07725) Classification<br />[2] [Semi-supervised Sequence Learning](https://arxiv.org/abs/1511.01432) | rsepassi, a-dai | | [adversarial_text](adversarial_text) | [1] [Adversarial Training Methods for Semi-Supervised Text](https://arxiv.org/abs/1605.07725) Classification<br />[2] [Semi-supervised Sequence Learning](https://arxiv.org/abs/1511.01432) | [1] ICLR 2017<br />[2] NIPS 2015 | rsepassi, a-dai |
| [cvt_text](cvt_text) | [Semi-supervised sequence learning with cross-view training](https://arxiv.org/abs/1809.08370) | clarkkev, lmthang | | [cvt_text](cvt_text) | [Semi-Supervised Sequence Modeling with Cross-View Training](https://arxiv.org/abs/1809.08370) | EMNLP 2018 | clarkkev, lmthang |
### Audio and Speech ### Audio and Speech
| Directory | Referenece (Paper) | Maintainer(s) | | Directory | Paper(s) | Conference | Maintainer(s) |
|-----------|--------------------|---------------| |-----------|----------|------------|---------------|
| [audioset](audioset) | [1] [AudioSet: A Large Scale Dataset of Audio Events](https://research.google/pubs/pub45857/)<br />[2] [CNN Architectures for Large-Scale Audio Classification](https://research.google/pubs/pub45611/) | plakal, dpwe | | [audioset](audioset) | [1] [Audio Set: An ontology and human-labeled dataset for audio events](https://research.google/pubs/pub45857/)<br />[2] [CNN Architectures for Large-Scale Audio Classification](https://research.google/pubs/pub45611/) | ICASSP 2017 | plakal, dpwe |
### Reinforcement Learning ### Reinforcement Learning
| Directory | Referenece (Paper) | Maintainer(s) | | Directory | Paper(s) | Conference | Maintainer(s) |
|-----------|--------------------|---------------| |-----------|----------|------------|---------------|
| [efficient-hrl](efficient-hrl) | [1] [Data-Efficient Hierarchical Reinforcement Learning](https://arxiv.org/abs/1805.08296)<br />[2] [Near-Optimal Representation Learning for Hierarchical Reinforcement Learning](https://arxiv.org/abs/1810.01257) | ofirnachum | | [efficient-hrl](efficient-hrl) | [1] [Data-Efficient Hierarchical Reinforcement Learning](https://arxiv.org/abs/1805.08296)<br />[2] [Near-Optimal Representation Learning for Hierarchical Reinforcement Learning](https://arxiv.org/abs/1810.01257) | [1] NIPS 2018<br /> [2] ICLR 2019 | ofirnachum |
| [pcl_rl](pcl_rl) | [1] [Improving Policy Gradient by Exploring Under-appreciated Rewards](https://arxiv.org/abs/1611.09321)<br />[2] [Bridging the Gap Between Value and Policy Based Reinforcement Learning](https://arxiv.org/abs/1702.08892)<br />[3] [Trust-PCL: An Off-Policy Trust Region Method for Continuous Control](https://arxiv.org/abs/1707.01891) | ofirnachum | | [pcl_rl](pcl_rl) | [1] [Improving Policy Gradient by Exploring Under-appreciated Rewards](https://arxiv.org/abs/1611.09321)<br />[2] [Bridging the Gap Between Value and Policy Based Reinforcement Learning](https://arxiv.org/abs/1702.08892)<br />[3] [Trust-PCL: An Off-Policy Trust Region Method for Continuous Control](https://arxiv.org/abs/1707.01891) | [1] ICLR 2017<br />[2] NIPS 2017<br />[3] ICLR 2018 | ofirnachum |
### Others ### Others
| Directory | Referenece (Paper) | Maintainer(s) | | Directory | Paper(s) | Conference | Maintainer(s) |
|-----------|--------------------|---------------| |-----------|----------|------------|---------------|
| [lfads](lfads) | [LFADS - Latent Factor Analysis via Dynamical Systems](https://doi.org/10.1101/152884) | jazcollins, sussillo | | [lfads](lfads) | [LFADS - Latent Factor Analysis via Dynamical Systems](https://arxiv.org/abs/1608.06315) | | jazcollins, sussillo |
| [rebar](rebar) | [REBAR: Low-variance, unbiased gradient estimates for discrete latent variable models](https://arxiv.org/abs/1703.07370) | gjtucker | | [rebar](rebar) | [REBAR: Low-variance, unbiased gradient estimates for discrete latent variable models](https://arxiv.org/abs/1703.07370) | NIPS 2017 | gjtucker |
--- ---
...@@ -70,55 +70,55 @@ The research models are maintained by their respective authors. ...@@ -70,55 +70,55 @@ The research models are maintained by their respective authors.
The following research models are no longer maintained. The following research models are no longer maintained.
**Note**: We will remove archived models from the master branch in June, 2020. **Note**: We will remove archived models from the master branch in June, 2020.
After removal, you will still be able to access archived models in the archive branch. After removal, you will still be able to access archived models in the archive branch.
| Directory | Referenece (Paper) | Maintainer(s) | | Directory | Paper(s) | Conference | Maintainer(s) |
|-----------|--------------------|---------------| |-----------|----------|------------|---------------|
| [adv_imagenet_models](adv_imagenet_models) | [1] [Adversarial Machine Learning at Scale](https://arxiv.org/abs/1611.01236)<br />[2] [Ensemble Adversarial Training: Attacks and Defenses](https://arxiv.org/abs/1705.07204) | alexeykurakin | | [adv_imagenet_models](adv_imagenet_models) | [1] [Adversarial Machine Learning at Scale](https://arxiv.org/abs/1611.01236)<br />[2] [Ensemble Adversarial Training: Attacks and Defenses](https://arxiv.org/abs/1705.07204) | [1] ICLR 2017<br /> [2] ICLR 2018 | alexeykurakin |
| [adversarial_crypto](adversarial_crypto) | [Learning to Protect Communications with Adversarial Neural Cryptography](https://arxiv.org/abs/1610.06918) | dave-andersen | | [adversarial_crypto](adversarial_crypto) | [Learning to Protect Communications with Adversarial Neural Cryptography](https://arxiv.org/abs/1610.06918) | | dave-andersen |
| [adversarial_logit_pairing](adversarial_logit_pairing) | [Adversarial Logit Pairing](https://arxiv.org/abs/1803.06373) | alexeykurakin | | [adversarial_logit_pairing](adversarial_logit_pairing) | [Adversarial Logit Pairing](https://arxiv.org/abs/1803.06373) | | alexeykurakin |
| [autoencoder](autoencoder) | Various autoencoders | snurkabill | | [autoencoder](autoencoder) | Various autoencoders | | snurkabill |
| [brain_coder](brain_coder) | [Neural Program Synthesis with Priority Queue Training](https://arxiv.org/abs/1801.03526) | danabo, mnorouzi | | [brain_coder](brain_coder) | [Neural Program Synthesis with Priority Queue Training](https://arxiv.org/abs/1801.03526) | | danabo, mnorouzi |
| [cognitive_mapping_and_planning](cognitive_mapping_and_planning) | [Cognitive Mapping and Planning for Visual Navigation](https://arxiv.org/abs/1702.03920) | s-gupta | | [cognitive_mapping_and_planning](cognitive_mapping_and_planning) | [Cognitive Mapping and Planning for Visual Navigation](https://arxiv.org/abs/1702.03920) | CVPR 2017 | s-gupta |
| [compression](compression) | [Full Resolution Image Compression with Recurrent Neural Networks](https://arxiv.org/abs/1608.05148) | nmjohn | | [compression](compression) | [Full Resolution Image Compression with Recurrent Neural Networks](https://arxiv.org/abs/1608.05148) | CVPR 2017 | nmjohn |
| [deep_contextual_bandits](deep_contextual_bandits) | [Deep Bayesian Bandits Showdown: An Empirical Comparison of Bayesian Deep Networks for Thompson Sampling](https://arxiv.org/abs/1802.09127) | rikel | | [deep_contextual_bandits](deep_contextual_bandits) | [Deep Bayesian Bandits Showdown: An Empirical Comparison of Bayesian Deep Networks for Thompson Sampling](https://arxiv.org/abs/1802.09127) | ICLR 2018 | rikel |
| [deep_speech](deep_speech) | [Deep Speech 2](https://arxiv.org/abs/1512.02595) | yhliang2018 | | [deep_speech](deep_speech) | [Deep Speech 2](https://arxiv.org/abs/1512.02595) | ICLR 2016 | yhliang2018 |
| [domain_adaptation](domain_adaptation) | [1] [Domain Separation Networks](https://arxiv.org/abs/1608.06019) <br />[2] [Unsupervised Pixel-Level Domain Adaptation with Generative Adversarial Networks](https://arxiv.org/abs/1612.05424) | bousmalis, dmrd | | [domain_adaptation](domain_adaptation) | [1] [Domain Separation Networks](https://arxiv.org/abs/1608.06019) <br />[2] [Unsupervised Pixel-Level Domain Adaptation with Generative Adversarial Networks](https://arxiv.org/abs/1612.05424) | NIPS 2016 | bousmalis, dmrd |
| [feelvos](feelvos)| [FEELVOS](https://arxiv.org/abs/1902.09513) | pvoigtlaender, yuningchai, aquariusjay | | [feelvos](feelvos)| [FEELVOS](https://arxiv.org/abs/1902.09513) | CVPR 2019 | pvoigtlaender, yuningchai, aquariusjay |
| [fivo](fivo)| [Filtering variational objectives for training generative sequence models](https://arxiv.org/abs/1705.09279) | dieterichlawson | | [fivo](fivo)| [Filtering variational objectives for training generative sequence models](https://arxiv.org/abs/1705.09279) | NIPS 2017 | dieterichlawson |
| [global_objectives](global_objectives) | [Scalable Learning of Non-Decomposable Objectives](https://arxiv.org/abs/1608.04802) | mackeya-google | | [global_objectives](global_objectives) | [Scalable Learning of Non-Decomposable Objectives](https://arxiv.org/abs/1608.04802) | AISTATS 2017 | mackeya-google |
| [im2txt](im2txt) | [Show and Tell: Lessons learned from the 2015 MSCOCO Image Captioning Challenge](https://arxiv.org/abs/1609.06647) | cshallue | | [im2txt](im2txt) | [Show and Tell: Lessons learned from the 2015 MSCOCO Image Captioning Challenge](https://arxiv.org/abs/1609.06647) | TPAMI 2016 | cshallue |
| [inception](inception) | [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567) | shlens, vincentvanhoucke | | [inception](inception) | [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567) | CVPR 2016 | shlens, vincentvanhoucke |
| [keypointnet](keypointnet) | [KeypointNet](https://arxiv.org/abs/1807.03146) | mnorouzi | | [keypointnet](keypointnet) | [KeypointNet](https://arxiv.org/abs/1807.03146) | | mnorouzi |
| [learned_optimizer](learned_optimizer) | [Learned Optimizers that Scale and Generalize](https://arxiv.org/abs/1703.04813) | olganw, nirum | | [learned_optimizer](learned_optimizer) | [Learned Optimizers that Scale and Generalize](https://arxiv.org/abs/1703.04813) | ICML 2017 | olganw, nirum |
| [learning_to_remember_rare_events](learning_to_remember_rare_events) | [Learning to Remember Rare Events](https://arxiv.org/abs/1703.03129) | lukaszkaiser, ofirnachum | | [learning_to_remember_rare_events](learning_to_remember_rare_events) | [Learning to Remember Rare Events](https://arxiv.org/abs/1703.03129) | ICLR 2017| lukaszkaiser, ofirnachum |
| [learning_unsupervised_learning](learning_unsupervised_learning) | [Meta-Learning Update Rules for Unsupervised Representation Learning](https://arxiv.org/abs/1804.00222) | lukemetz, nirum | | [learning_unsupervised_learning](learning_unsupervised_learning) | [Meta-Learning Update Rules for Unsupervised Representation Learning](https://arxiv.org/abs/1804.00222) | ICLR 2019 | lukemetz, nirum |
| [lexnet_nc](lexnet_nc) | [Olive Oil is Made of Olives, Baby Oil is Made for Babies: Interpreting Noun Compounds using Paraphrases in a Neural Model](https://arxiv.org/abs/1803.08073) | vered1986, waterson | | [lexnet_nc](lexnet_nc) | [Olive Oil is Made of Olives, Baby Oil is Made for Babies: Interpreting Noun Compounds using Paraphrases in a Neural Model](https://arxiv.org/abs/1803.08073) | NAACL 2018 | vered1986, waterson |
| [lm_1b](lm_1b) | [Exploring the Limits of Language Modeling](https://arxiv.org/abs/1602.02410) | oriolvinyals, panyx0718 | | [lm_1b](lm_1b) | [Exploring the Limits of Language Modeling](https://arxiv.org/abs/1602.02410) | | oriolvinyals, panyx0718 |
| [lm_commonsense](lm_commonsense) | [A Simple Method for Commonsense Reasoning](https://arxiv.org/abs/1806.02847) | thtrieu | | [lm_commonsense](lm_commonsense) | [A Simple Method for Commonsense Reasoning](https://arxiv.org/abs/1806.02847) | | thtrieu |
| [maskgan](maskgan)| [MaskGAN: Better Text Generation via Filling in the______](https://arxiv.org/abs/1801.07736) | liamb315, a-dai | | [maskgan](maskgan)| [MaskGAN: Better Text Generation via Filling in the](https://arxiv.org/abs/1801.07736) | ICLR 2018 | liamb315, a-dai |
| [namignizer](namignizer)| Namignizer | knathanieltucker | | [namignizer](namignizer)| Namignizer | | knathanieltucker |
| [neural_gpu](neural_gpu)| [Neural GPUs Learn Algorithms](https://arxiv.org/abs/1511.08228) | lukaszkaiser | | [neural_gpu](neural_gpu)| [Neural GPUs Learn Algorithms](https://arxiv.org/abs/1511.08228) | | lukaszkaiser |
| [neural_programmer](neural_programmer) | [Learning a Natural Language Interface with Neural Programmer](https://arxiv.org/abs/1611.08945) | arvind2505 | | [neural_programmer](neural_programmer) | [Learning a Natural Language Interface with Neural Programmer](https://arxiv.org/abs/1611.08945) | ICLR 2017 | arvind2505 |
| [next_frame_prediction](next_frame_prediction) | [Visual Dynamics](https://arxiv.org/abs/1607.02586) | panyx0718 | | [next_frame_prediction](next_frame_prediction) | [Visual Dynamics: Probabilistic Future Frame Synthesis via Cross Convolutional Networks](https://arxiv.org/abs/1607.02586) | NIPS 2016 | panyx0718 |
| [ptn](ptn) | [Perspective Transformer Nets](https://arxiv.org/abs/1612.00814) | xcyan, arkanath, hellojas, honglaklee | | [ptn](ptn) | [Perspective Transformer Nets: Learning Single-View 3D Object Reconstruction without 3D Supervision](https://arxiv.org/abs/1612.00814) | NIPS 2016 | xcyan, arkanath, hellojas, honglaklee |
| [qa_kg](qa_kg) | [Learning to Reason](https://arxiv.org/abs/1704.05526) | yuyuz | | [qa_kg](qa_kg) | [Learning to Reason: End-to-End Module Networks for Visual Question Answering](https://arxiv.org/abs/1704.05526) | ICCV 2017 | yuyuz |
| [real_nvp](real_nvp) | [Density estimation using Real NVP](https://arxiv.org/abs/1605.08803) | laurent-dinh | | [real_nvp](real_nvp) | [Density estimation using Real NVP](https://arxiv.org/abs/1605.08803) | ICLR 2017 | laurent-dinh |
| [sentiment_analysis](sentiment_analysis)| [Effective Use of Word Order for Text Categorization with Convolutional Neural Networks](https://arxiv.org/abs/1412.1058) | sculd | | [sentiment_analysis](sentiment_analysis)| [Effective Use of Word Order for Text Categorization with Convolutional Neural Networks](https://arxiv.org/abs/1412.1058) | NAACL HLT 2015 | sculd |
| [seq2species](seq2species) | [Seq2Species: A deep learning approach to pattern recognition for short DNA sequences](https://doi.org/10.1101/353474) | apbusia, depristo | | [seq2species](seq2species) | [Seq2Species: A deep learning approach to pattern recognition for short DNA sequences](https://doi.org/10.1101/353474) | | apbusia, depristo |
| [skip_thoughts](skip_thoughts) | [Skip-Thought Vectors](https://arxiv.org/abs/1506.06726) | cshallue | | [skip_thoughts](skip_thoughts) | [Skip-Thought Vectors](https://arxiv.org/abs/1506.06726) | | cshallue |
| [steve](steve) | [Sample-Efficient Reinforcement Learning with Stochastic Ensemble Value Expansion](https://arxiv.org/abs/1807.01675) | buckman-google | | [steve](steve) | [Sample-Efficient Reinforcement Learning with Stochastic Ensemble Value Expansion](https://arxiv.org/abs/1807.01675) | NeurIPS 2018 | buckman-google |
| [street](street) | [End-to-End Interpretation of the French Street Name Signs Dataset](https://arxiv.org/abs/1702.03970) | theraysmith | | [street](street) | [End-to-End Interpretation of the French Street Name Signs Dataset](https://arxiv.org/abs/1702.03970) | ECCV 2016 | theraysmith |
| [struct2depth](struct2depth)| [Depth Prediction Without the Sensors: Leveraging Structure for Unsupervised Learning from Monocular Videos](https://arxiv.org/abs/1811.06152) | aneliaangelova | | [struct2depth](struct2depth)| [Depth Prediction Without the Sensors: Leveraging Structure for Unsupervised Learning from Monocular Videos](https://arxiv.org/abs/1811.06152) | AAAI 2019 | aneliaangelova |
| [swivel](swivel) | [Swivel: Improving Embeddings by Noticing What's Missing](https://arxiv.org/abs/1602.02215) | waterson | | [swivel](swivel) | [Swivel: Improving Embeddings by Noticing What's Missing](https://arxiv.org/abs/1602.02215) | | waterson |
| [tcn](tcn) | [Time-Contrastive Networks: Self-Supervised Learning from Video](https://arxiv.org/abs/1704.06888) | coreylynch, sermanet | | [tcn](tcn) | [Time-Contrastive Networks: Self-Supervised Learning from Video](https://arxiv.org/abs/1704.06888) | ICRA 2018 | coreylynch, sermanet |
| [textsum](textsum)| [A Neural Attention Model for Abstractive Sentence Summarization](https://arxiv.org/abs/1509.00685) | panyx0718, peterjliu | | [textsum](textsum)| [A Neural Attention Model for Abstractive Sentence Summarization](https://arxiv.org/abs/1509.00685) | EMNLP 2015 | panyx0718, peterjliu |
| [transformer](transformer) | [Spatial Transformer Network](https://arxiv.org/abs/1506.02025) | daviddao| | [transformer](transformer) | [Spatial Transformer Network](https://arxiv.org/abs/1506.02025) | NIPS 2015 | daviddao|
| [video_prediction](video_prediction) | [Unsupervised Learning for Physical Interaction through Video Prediction](https://arxiv.org/abs/1605.07157) | cbfinn | | [video_prediction](video_prediction) | [Unsupervised Learning for Physical Interaction through Video Prediction](https://arxiv.org/abs/1605.07157) | NIPS 2016 | cbfinn |
--- ---
## Contributions ## Contributions
If you want to contribute, please review the [contribution guidelines](../../../wiki/How-to-contribute). If you want to contribute, please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute).
...@@ -28,8 +28,6 @@ import data.dataset as dataset ...@@ -28,8 +28,6 @@ import data.dataset as dataset
import decoder import decoder
import deep_speech_model import deep_speech_model
from official.utils.flags import core as flags_core from official.utils.flags import core as flags_core
from official.utils.logs import hooks_helper
from official.utils.logs import logger
from official.utils.misc import distribution_utils from official.utils.misc import distribution_utils
from official.utils.misc import model_helpers from official.utils.misc import model_helpers
...@@ -276,16 +274,6 @@ def run_deep_speech(_): ...@@ -276,16 +274,6 @@ def run_deep_speech(_):
"use_bias": flags_obj.use_bias "use_bias": flags_obj.use_bias
} }
dataset_name = "LibriSpeech"
benchmark_logger = logger.get_benchmark_logger()
benchmark_logger.log_run_info("deep_speech", dataset_name, run_params,
test_id=flags_obj.benchmark_test_id)
train_hooks = hooks_helper.get_train_hooks(
flags_obj.hooks,
model_dir=flags_obj.model_dir,
batch_size=flags_obj.batch_size)
per_replica_batch_size = per_device_batch_size(flags_obj.batch_size, num_gpus) per_replica_batch_size = per_device_batch_size(flags_obj.batch_size, num_gpus)
def input_fn_train(): def input_fn_train():
...@@ -307,7 +295,7 @@ def run_deep_speech(_): ...@@ -307,7 +295,7 @@ def run_deep_speech(_):
train_speech_dataset.entries, cycle_index, flags_obj.sortagrad, train_speech_dataset.entries, cycle_index, flags_obj.sortagrad,
flags_obj.batch_size) flags_obj.batch_size)
estimator.train(input_fn=input_fn_train, hooks=train_hooks) estimator.train(input_fn=input_fn_train)
# Evaluation # Evaluation
tf.logging.info("Starting to evaluate...") tf.logging.info("Starting to evaluate...")
...@@ -433,8 +421,7 @@ def define_deep_speech_flags(): ...@@ -433,8 +421,7 @@ def define_deep_speech_flags():
def main(_): def main(_):
with logger.benchmark_context(flags_obj): run_deep_speech(flags_obj)
run_deep_speech(flags_obj)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -25,7 +25,6 @@ from delf.protos import delf_config_pb2 ...@@ -25,7 +25,6 @@ from delf.protos import delf_config_pb2
from delf.protos import feature_pb2 from delf.protos import feature_pb2
from delf.python import box_io from delf.python import box_io
from delf.python import datum_io from delf.python import datum_io
from delf.python import delf_v1
from delf.python import feature_aggregation_extractor from delf.python import feature_aggregation_extractor
from delf.python import feature_aggregation_similarity from delf.python import feature_aggregation_similarity
from delf.python import feature_extractor from delf.python import feature_extractor
......
...@@ -86,6 +86,9 @@ message DelfConfig { ...@@ -86,6 +86,9 @@ message DelfConfig {
// Path to DELF model. // Path to DELF model.
optional string model_path = 1; // Required. optional string model_path = 1; // Required.
// Whether model has been exported using TF version 2+.
optional bool is_tf2_exported = 10 [default = false];
// Image scales to be used. // Image scales to be used.
repeated float image_scales = 2; repeated float image_scales = 2;
......
...@@ -20,11 +20,14 @@ from __future__ import print_function ...@@ -20,11 +20,14 @@ from __future__ import print_function
import os import os
from absl import flags
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from delf import box_io from delf import box_io
FLAGS = flags.FLAGS
class BoxesIoTest(tf.test.TestCase): class BoxesIoTest(tf.test.TestCase):
...@@ -57,8 +60,7 @@ class BoxesIoTest(tf.test.TestCase): ...@@ -57,8 +60,7 @@ class BoxesIoTest(tf.test.TestCase):
def testWriteAndReadToFile(self): def testWriteAndReadToFile(self):
boxes, scores, class_indices = self._create_data() boxes, scores, class_indices = self._create_data()
tmpdir = tf.compat.v1.test.get_temp_dir() filename = os.path.join(FLAGS.test_tmpdir, 'test.boxes')
filename = os.path.join(tmpdir, 'test.boxes')
box_io.WriteToFile(filename, boxes, scores, class_indices) box_io.WriteToFile(filename, boxes, scores, class_indices)
data_read = box_io.ReadFromFile(filename) data_read = box_io.ReadFromFile(filename)
...@@ -67,8 +69,7 @@ class BoxesIoTest(tf.test.TestCase): ...@@ -67,8 +69,7 @@ class BoxesIoTest(tf.test.TestCase):
self.assertAllEqual(class_indices, data_read[2]) self.assertAllEqual(class_indices, data_read[2])
def testWriteAndReadToFileEmptyFile(self): def testWriteAndReadToFileEmptyFile(self):
tmpdir = tf.compat.v1.test.get_temp_dir() filename = os.path.join(FLAGS.test_tmpdir, 'test.box')
filename = os.path.join(tmpdir, 'test.box')
box_io.WriteToFile(filename, np.array([]), np.array([]), np.array([])) box_io.WriteToFile(filename, np.array([]), np.array([]), np.array([]))
data_read = box_io.ReadFromFile(filename) data_read = box_io.ReadFromFile(filename)
......
...@@ -20,11 +20,14 @@ from __future__ import print_function ...@@ -20,11 +20,14 @@ from __future__ import print_function
import os import os
from absl import flags
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from delf import datum_io from delf import datum_io
FLAGS = flags.FLAGS
class DatumIoTest(tf.test.TestCase): class DatumIoTest(tf.test.TestCase):
...@@ -69,8 +72,7 @@ class DatumIoTest(tf.test.TestCase): ...@@ -69,8 +72,7 @@ class DatumIoTest(tf.test.TestCase):
def testWriteAndReadToFile(self): def testWriteAndReadToFile(self):
data = np.array([[[-1.0, 125.0, -2.5], [14.5, 3.5, 0.0]], data = np.array([[[-1.0, 125.0, -2.5], [14.5, 3.5, 0.0]],
[[20.0, 0.0, 30.0], [25.5, 36.0, 42.0]]]) [[20.0, 0.0, 30.0], [25.5, 36.0, 42.0]]])
tmpdir = tf.compat.v1.test.get_temp_dir() filename = os.path.join(FLAGS.test_tmpdir, 'test.datum')
filename = os.path.join(tmpdir, 'test.datum')
datum_io.WriteToFile(data, filename) datum_io.WriteToFile(data, filename)
data_read = datum_io.ReadFromFile(filename) data_read = datum_io.ReadFromFile(filename)
self.assertAllEqual(data_read, data) self.assertAllEqual(data_read, data)
...@@ -84,8 +86,7 @@ class DatumIoTest(tf.test.TestCase): ...@@ -84,8 +86,7 @@ class DatumIoTest(tf.test.TestCase):
data_2 = np.array( data_2 = np.array(
[[[255, 0, 5], [10, 300, 0]], [[20, 1, 100], [255, 360, 420]]], [[[255, 0, 5], [10, 300, 0]], [[20, 1, 100], [255, 360, 420]]],
dtype='uint32') dtype='uint32')
tmpdir = tf.compat.v1.test.get_temp_dir() filename = os.path.join(FLAGS.test_tmpdir, 'test.datum_pair')
filename = os.path.join(tmpdir, 'test.datum_pair')
datum_io.WritePairToFile(data_1, data_2, filename) datum_io.WritePairToFile(data_1, data_2, filename)
data_read_1, data_read_2 = datum_io.ReadPairFromFile(filename) data_read_1, data_read_2 = datum_io.ReadPairFromFile(filename)
self.assertAllEqual(data_read_1, data_1) self.assertAllEqual(data_read_1, data_1)
......
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""DELF model implementation based on the following paper.
Large-Scale Image Retrieval with Attentive Deep Local Features
https://arxiv.org/abs/1612.06321
Please refer to the README.md file for detailed explanations on using the DELF
model.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tf_slim import layers
from tf_slim.nets import resnet_v1
from tf_slim.ops.arg_scope import arg_scope
_SUPPORTED_TARGET_LAYER = ['resnet_v1_50/block3', 'resnet_v1_50/block4']
# The variable scope for the attention portion of the model.
_ATTENTION_VARIABLE_SCOPE = 'attention_block'
# The attention_type determines whether the attention based feature aggregation
# is performed on the L2-normalized feature map or on the default feature map
# where L2-normalization is not applied. Note that in both cases, attention
# functions are built on the un-normalized feature map. This is only relevant
# for the training stage.
# Currently supported options are as follows:
# * use_l2_normalized_feature:
# The option use_l2_normalized_feature first applies L2-normalization on the
# feature map and then applies attention based feature aggregation. This
# option is used for the DELF+FT+Att model in the paper.
# * use_default_input_feature:
# The option use_default_input_feature aggregates unnormalized feature map
# directly.
_SUPPORTED_ATTENTION_TYPES = [
'use_l2_normalized_feature', 'use_default_input_feature'
]
# Supported types of non-lineary for the attention score function.
_SUPPORTED_ATTENTION_NONLINEARITY = ['softplus']
class DelfV1(object):
"""Creates a DELF model.
Args:
target_layer_type: The name of target CNN architecture and its layer.
Raises:
ValueError: If an unknown target_layer_type is provided.
"""
def __init__(self, target_layer_type=_SUPPORTED_TARGET_LAYER[0]):
print('Creating model %s ' % target_layer_type)
self._target_layer_type = target_layer_type
if self._target_layer_type not in _SUPPORTED_TARGET_LAYER:
raise ValueError('Unknown model type.')
@property
def target_layer_type(self):
return self._target_layer_type
def _PerformAttention(self,
attention_feature_map,
feature_map,
attention_nonlinear,
kernel=1):
"""Helper function to construct the attention part of the model.
Computes attention score map and aggregates the input feature map based on
the attention score map.
Args:
attention_feature_map: Potentially normalized feature map that will be
aggregated with attention score map.
feature_map: Unnormalized feature map that will be used to compute
attention score map.
attention_nonlinear: Type of non-linearity that will be applied to
attention value.
kernel: Convolutional kernel to use in attention layers (eg: 1, [3, 3]).
Returns:
attention_feat: Aggregated feature vector.
attention_prob: Attention score map after the non-linearity.
attention_score: Attention score map before the non-linearity.
Raises:
ValueError: If unknown attention non-linearity type is provided.
"""
with tf.compat.v1.variable_scope(
'attention', values=[attention_feature_map, feature_map]):
with tf.compat.v1.variable_scope('compute', values=[feature_map]):
activation_fn_conv1 = tf.nn.relu
feature_map_conv1 = layers.conv2d(
feature_map,
512,
kernel,
rate=1,
activation_fn=activation_fn_conv1,
scope='conv1')
attention_score = layers.conv2d(
feature_map_conv1,
1,
kernel,
rate=1,
activation_fn=None,
normalizer_fn=None,
scope='conv2')
# Set activation of conv2 layer of attention model.
with tf.compat.v1.variable_scope(
'merge', values=[attention_feature_map, attention_score]):
if attention_nonlinear not in _SUPPORTED_ATTENTION_NONLINEARITY:
raise ValueError('Unknown attention non-linearity.')
if attention_nonlinear == 'softplus':
with tf.compat.v1.variable_scope(
'softplus_attention',
values=[attention_feature_map, attention_score]):
attention_prob = tf.nn.softplus(attention_score)
attention_feat = tf.reduce_mean(
tf.multiply(attention_feature_map, attention_prob), [1, 2])
attention_feat = tf.expand_dims(tf.expand_dims(attention_feat, 1), 2)
return attention_feat, attention_prob, attention_score
def _GetAttentionSubnetwork(
self,
feature_map,
end_points,
attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
attention_type=_SUPPORTED_ATTENTION_TYPES[0],
kernel=1,
reuse=False):
"""Constructs the part of the model performing attention.
Args:
feature_map: A tensor of size [batch, height, width, channels]. Usually it
corresponds to the output feature map of a fully-convolutional network.
end_points: Set of activations of the network constructed so far.
attention_nonlinear: Type of non-linearity on top of the attention
function.
attention_type: Type of the attention structure.
kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
reuse: Whether or not the layer and its variables should be reused.
Returns:
prelogits: A tensor of size [batch, 1, 1, channels].
attention_prob: Attention score after the non-linearity.
attention_score: Attention score before the non-linearity.
end_points: Updated set of activations, for external use.
Raises:
ValueError: If unknown attention_type is provided.
"""
with tf.compat.v1.variable_scope(
_ATTENTION_VARIABLE_SCOPE,
values=[feature_map, end_points],
reuse=reuse):
if attention_type not in _SUPPORTED_ATTENTION_TYPES:
raise ValueError('Unknown attention_type.')
if attention_type == 'use_l2_normalized_feature':
attention_feature_map = tf.nn.l2_normalize(
feature_map, 3, name='l2_normalize')
elif attention_type == 'use_default_input_feature':
attention_feature_map = feature_map
end_points['attention_feature_map'] = attention_feature_map
attention_outputs = self._PerformAttention(attention_feature_map,
feature_map,
attention_nonlinear, kernel)
prelogits, attention_prob, attention_score = attention_outputs
end_points['prelogits'] = prelogits
end_points['attention_prob'] = attention_prob
end_points['attention_score'] = attention_score
return prelogits, attention_prob, attention_score, end_points
def GetResnet50Subnetwork(self,
images,
is_training=False,
global_pool=False,
reuse=None):
"""Constructs resnet_v1_50 part of the DELF model.
Args:
images: A tensor of size [batch, height, width, channels].
is_training: Whether or not the model is in training mode.
global_pool: If True, perform global average pooling after feature
extraction. This may be useful for DELF's descriptor fine-tuning stage.
reuse: Whether or not the layer and its variables should be reused.
Returns:
net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
If global_pool is True, height_out = width_out = 1.
end_points: A set of activations for external use.
"""
block = resnet_v1.resnet_v1_block
blocks = [
block('block1', base_depth=64, num_units=3, stride=2),
block('block2', base_depth=128, num_units=4, stride=2),
block('block3', base_depth=256, num_units=6, stride=2),
]
if self._target_layer_type == 'resnet_v1_50/block4':
blocks.append(block('block4', base_depth=512, num_units=3, stride=1))
net, end_points = resnet_v1.resnet_v1(
images,
blocks,
is_training=is_training,
global_pool=global_pool,
reuse=reuse,
scope='resnet_v1_50')
return net, end_points
def GetAttentionPrelogit(
self,
images,
weight_decay=0.0001,
attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
attention_type=_SUPPORTED_ATTENTION_TYPES[0],
kernel=1,
training_resnet=False,
training_attention=False,
reuse=False,
use_batch_norm=True):
"""Constructs attention model on resnet_v1_50.
Args:
images: A tensor of size [batch, height, width, channels].
weight_decay: The parameters for weight_decay regularizer.
attention_nonlinear: Type of non-linearity on top of the attention
function.
attention_type: Type of the attention structure.
kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
training_resnet: Whether or not the Resnet blocks from the model are in
training mode.
training_attention: Whether or not the attention part of the model is in
training mode.
reuse: Whether or not the layer and its variables should be reused.
use_batch_norm: Whether or not to use batch normalization.
Returns:
prelogits: A tensor of size [batch, 1, 1, channels].
attention_prob: Attention score after the non-linearity.
attention_score: Attention score before the non-linearity.
feature_map: Features extracted from the model, which are not
l2-normalized.
end_points: Set of activations for external use.
"""
# Construct Resnet50 features.
with arg_scope(resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)):
_, end_points = self.GetResnet50Subnetwork(
images, is_training=training_resnet, reuse=reuse)
feature_map = end_points[self._target_layer_type]
# Construct attention subnetwork on top of features.
with arg_scope(
resnet_v1.resnet_arg_scope(
weight_decay=weight_decay, use_batch_norm=use_batch_norm)):
with arg_scope([layers.batch_norm], is_training=training_attention):
(prelogits, attention_prob, attention_score,
end_points) = self._GetAttentionSubnetwork(
feature_map,
end_points,
attention_nonlinear=attention_nonlinear,
attention_type=attention_type,
kernel=kernel,
reuse=reuse)
return prelogits, attention_prob, attention_score, feature_map, end_points
def _GetAttentionModel(
self,
images,
num_classes,
weight_decay=0.0001,
attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
attention_type=_SUPPORTED_ATTENTION_TYPES[0],
kernel=1,
training_resnet=False,
training_attention=False,
reuse=False):
"""Constructs attention model on resnet_v1_50.
Args:
images: A tensor of size [batch, height, width, channels]
num_classes: The number of output classes.
weight_decay: The parameters for weight_decay regularizer.
attention_nonlinear: Type of non-linearity on top of the attention
function.
attention_type: Type of the attention structure.
kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
training_resnet: Whether or not the Resnet blocks from the model are in
training mode.
training_attention: Whether or not the attention part of the model is in
training mode.
reuse: Whether or not the layer and its variables should be reused.
Returns:
logits: A tensor of size [batch, num_classes].
attention_prob: Attention score after the non-linearity.
attention_score: Attention score before the non-linearity.
feature_map: Features extracted from the model, which are not
l2-normalized.
"""
attention_feat, attention_prob, attention_score, feature_map, _ = (
self.GetAttentionPrelogit(
images,
weight_decay,
attention_nonlinear=attention_nonlinear,
attention_type=attention_type,
kernel=kernel,
training_resnet=training_resnet,
training_attention=training_attention,
reuse=reuse))
with arg_scope(
resnet_v1.resnet_arg_scope(
weight_decay=weight_decay, batch_norm_scale=True)):
with arg_scope([layers.batch_norm], is_training=training_attention):
with tf.compat.v1.variable_scope(
_ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse):
logits = layers.conv2d(
attention_feat,
num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
scope='logits')
logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze')
return logits, attention_prob, attention_score, feature_map
def AttentionModel(self,
images,
num_classes,
weight_decay=0.0001,
attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
attention_type=_SUPPORTED_ATTENTION_TYPES[0],
kernel=1,
training_resnet=False,
training_attention=False,
reuse=False):
"""Constructs attention based classification model for training.
Args:
images: A tensor of size [batch, height, width, channels]
num_classes: The number of output classes.
weight_decay: The parameters for weight_decay regularizer.
attention_nonlinear: Type of non-linearity on top of the attention
function.
attention_type: Type of the attention structure.
kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
training_resnet: Whether or not the Resnet blocks from the model are in
training mode.
training_attention: Whether or not the model is in training mode. Note
that this function only supports training the attention part of the
model, ie, the feature extraction layers are not trained.
reuse: Whether or not the layer and its variables should be reused.
Returns:
logit: A tensor of size [batch, num_classes]
attention: Attention score after the non-linearity.
feature_map: Features extracted from the model, which are not
l2-normalized.
Raises:
ValueError: If unknown target_layer_type is provided.
"""
if 'resnet_v1_50' in self._target_layer_type:
net_outputs = self._GetAttentionModel(
images,
num_classes,
weight_decay,
attention_nonlinear=attention_nonlinear,
attention_type=attention_type,
kernel=kernel,
training_resnet=training_resnet,
training_attention=training_attention,
reuse=reuse)
logits, attention, _, feature_map = net_outputs
else:
raise ValueError('Unknown target_layer_type.')
return logits, attention, feature_map
...@@ -124,71 +124,70 @@ def ExtractAggregatedRepresentationsToFiles(image_names, features_dir, ...@@ -124,71 +124,70 @@ def ExtractAggregatedRepresentationsToFiles(image_names, features_dir,
if not tf.io.gfile.exists(output_aggregation_dir): if not tf.io.gfile.exists(output_aggregation_dir):
tf.io.gfile.makedirs(output_aggregation_dir) tf.io.gfile.makedirs(output_aggregation_dir)
with tf.compat.v1.Session() as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config)
start = time.time()
start = time.clock() for i in range(num_images):
for i in range(num_images): if i == 0:
if i == 0: print('Starting to extract aggregation from images...')
print('Starting to extract aggregation from images...') elif i % _STATUS_CHECK_ITERATIONS == 0:
elif i % _STATUS_CHECK_ITERATIONS == 0: elapsed = (time.time() - start)
elapsed = (time.clock() - start) print('Processing image %d out of %d, last %d '
print('Processing image %d out of %d, last %d ' 'images took %f seconds' %
'images took %f seconds' % (i, num_images, _STATUS_CHECK_ITERATIONS, elapsed))
(i, num_images, _STATUS_CHECK_ITERATIONS, elapsed)) start = time.time()
start = time.clock()
image_name = image_names[i]
image_name = image_names[i]
# Compose output file name, skip extraction for this image if it already
# Compose output file name, skip extraction for this image if it already # exists.
# exists. output_aggregation_filename = os.path.join(output_aggregation_dir,
output_aggregation_filename = os.path.join(output_aggregation_dir, image_name + output_extension)
image_name + output_extension) if tf.io.gfile.exists(output_aggregation_filename):
if tf.io.gfile.exists(output_aggregation_filename): print('Skipping %s' % image_name)
print('Skipping %s' % image_name) continue
continue
# Load DELF features.
# Load DELF features. if config.use_regional_aggregation:
if config.use_regional_aggregation: if not mapping_path:
if not mapping_path: raise ValueError(
raise ValueError( 'Requested regional aggregation, but mapping_path was not '
'Requested regional aggregation, but mapping_path was not ' 'provided')
'provided') descriptors_list = []
descriptors_list = [] num_features_per_box = []
num_features_per_box = [] for box_feature_file in images_to_box_feature_files[image_name]:
for box_feature_file in images_to_box_feature_files[image_name]: delf_filename = os.path.join(features_dir,
delf_filename = os.path.join(features_dir, box_feature_file + _DELF_EXTENSION)
box_feature_file + _DELF_EXTENSION) _, _, box_descriptors, _, _ = feature_io.ReadFromFile(delf_filename)
_, _, box_descriptors, _, _ = feature_io.ReadFromFile(delf_filename) # If `box_descriptors` is empty, reshape it such that it can be
# If `box_descriptors` is empty, reshape it such that it can be # concatenated with other descriptors.
# concatenated with other descriptors. if not box_descriptors.shape[0]:
if not box_descriptors.shape[0]: box_descriptors = np.reshape(box_descriptors,
box_descriptors = np.reshape(box_descriptors, [0, config.feature_dimensionality])
[0, config.feature_dimensionality]) descriptors_list.append(box_descriptors)
descriptors_list.append(box_descriptors) num_features_per_box.append(box_descriptors.shape[0])
num_features_per_box.append(box_descriptors.shape[0])
descriptors = np.concatenate(descriptors_list)
descriptors = np.concatenate(descriptors_list) else:
else: input_delf_filename = os.path.join(features_dir,
input_delf_filename = os.path.join(features_dir, image_name + _DELF_EXTENSION)
image_name + _DELF_EXTENSION) _, _, descriptors, _, _ = feature_io.ReadFromFile(input_delf_filename)
_, _, descriptors, _, _ = feature_io.ReadFromFile(input_delf_filename) # If `descriptors` is empty, reshape it to avoid extraction failure.
# If `descriptors` is empty, reshape it to avoid extraction failure. if not descriptors.shape[0]:
if not descriptors.shape[0]: descriptors = np.reshape(descriptors,
descriptors = np.reshape(descriptors, [0, config.feature_dimensionality])
[0, config.feature_dimensionality]) num_features_per_box = None
num_features_per_box = None
# Extract and save aggregation. If using VLAD, only
# Extract and save aggregation. If using VLAD, only # `aggregated_descriptors` needs to be saved.
# `aggregated_descriptors` needs to be saved. (aggregated_descriptors,
(aggregated_descriptors, feature_visual_words) = extractor.Extract(descriptors,
feature_visual_words) = extractor.Extract(descriptors, num_features_per_box)
num_features_per_box) if config.aggregation_type == _VLAD:
if config.aggregation_type == _VLAD: datum_io.WriteToFile(aggregated_descriptors,
datum_io.WriteToFile(aggregated_descriptors, output_aggregation_filename)
output_aggregation_filename) else:
else: datum_io.WritePairToFile(aggregated_descriptors,
datum_io.WritePairToFile(aggregated_descriptors, feature_visual_words.astype('uint32'),
feature_visual_words.astype('uint32'), output_aggregation_filename)
output_aggregation_filename)
...@@ -131,7 +131,7 @@ def main(argv): ...@@ -131,7 +131,7 @@ def main(argv):
delf_dataset = tf.data.Dataset.from_tensor_slices((features_placeholder)) delf_dataset = tf.data.Dataset.from_tensor_slices((features_placeholder))
delf_dataset = delf_dataset.shuffle(1000).batch( delf_dataset = delf_dataset.shuffle(1000).batch(
features_for_clustering.shape[0]) features_for_clustering.shape[0])
iterator = delf_dataset.make_initializable_iterator() iterator = tf.compat.v1.data.make_initializable_iterator(delf_dataset)
def _initializer_fn(sess): def _initializer_fn(sess):
"""Initialize dataset iterator, feed in the data.""" """Initialize dataset iterator, feed in the data."""
......
...@@ -102,7 +102,15 @@ def MakeExtractor(config): ...@@ -102,7 +102,15 @@ def MakeExtractor(config):
Returns: Returns:
Function that receives an image and returns features. Function that receives an image and returns features.
Raises:
ValueError: if config is invalid.
""" """
# Assert the configuration
if config.use_global_features and hasattr(
config, 'is_tf2_exported') and config.is_tf2_exported:
raise ValueError('use_global_features is incompatible with is_tf2_exported')
# Load model. # Load model.
model = tf.saved_model.load(config.model_path) model = tf.saved_model.load(config.model_path)
...@@ -178,7 +186,8 @@ def MakeExtractor(config): ...@@ -178,7 +186,8 @@ def MakeExtractor(config):
else: else:
global_pca_parameters['variances'] = None global_pca_parameters['variances'] = None
model = model.prune(feeds=feeds, fetches=fetches) if not hasattr(config, 'is_tf2_exported') or not config.is_tf2_exported:
model = model.prune(feeds=feeds, fetches=fetches)
def ExtractorFn(image, resize_factor=1.0): def ExtractorFn(image, resize_factor=1.0):
"""Receives an image and returns DELF global and/or local features. """Receives an image and returns DELF global and/or local features.
...@@ -197,7 +206,6 @@ def MakeExtractor(config): ...@@ -197,7 +206,6 @@ def MakeExtractor(config):
features (key 'local_features' mapping to a dict with keys 'locations', features (key 'local_features' mapping to a dict with keys 'locations',
'descriptors', 'scales', 'attention'). 'descriptors', 'scales', 'attention').
""" """
resized_image, scale_factors = ResizeImage( resized_image, scale_factors = ResizeImage(
image, config, resize_factor=resize_factor) image, config, resize_factor=resize_factor)
...@@ -224,8 +232,20 @@ def MakeExtractor(config): ...@@ -224,8 +232,20 @@ def MakeExtractor(config):
output = None output = None
if config.use_local_features: if config.use_local_features:
output = model(image_tensor, image_scales_tensor, score_threshold_tensor, if hasattr(config, 'is_tf2_exported') and config.is_tf2_exported:
max_feature_num_tensor) predict = model.signatures['serving_default']
output_dict = predict(
input_image=image_tensor,
input_scales=image_scales_tensor,
input_max_feature_num=max_feature_num_tensor,
input_abs_thres=score_threshold_tensor)
output = [
output_dict['boxes'], output_dict['features'],
output_dict['scales'], output_dict['scores']
]
else:
output = model(image_tensor, image_scales_tensor,
score_threshold_tensor, max_feature_num_tensor)
else: else:
output = model(image_tensor, image_scales_tensor) output = model(image_tensor, image_scales_tensor)
......
...@@ -40,7 +40,6 @@ class ExtractAggregatedRepresentation(object): ...@@ -40,7 +40,6 @@ class ExtractAggregatedRepresentation(object):
"""Class for extraction of aggregated local feature representation. """Class for extraction of aggregated local feature representation.
Args: Args:
sess: TensorFlow session to use.
aggregation_config: AggregationConfig object defining type of aggregation to aggregation_config: AggregationConfig object defining type of aggregation to
use. use.
...@@ -48,65 +47,28 @@ class ExtractAggregatedRepresentation(object): ...@@ -48,65 +47,28 @@ class ExtractAggregatedRepresentation(object):
ValueError: If aggregation type is invalid. ValueError: If aggregation type is invalid.
""" """
def __init__(self, sess, aggregation_config): def __init__(self, aggregation_config):
self._sess = sess
self._codebook_size = aggregation_config.codebook_size self._codebook_size = aggregation_config.codebook_size
self._feature_dimensionality = aggregation_config.feature_dimensionality self._feature_dimensionality = aggregation_config.feature_dimensionality
self._aggregation_type = aggregation_config.aggregation_type self._aggregation_type = aggregation_config.aggregation_type
self._feature_batch_size = aggregation_config.feature_batch_size self._feature_batch_size = aggregation_config.feature_batch_size
self._codebook_path = aggregation_config.codebook_path
self._use_regional_aggregation = aggregation_config.use_regional_aggregation
self._use_l2_normalization = aggregation_config.use_l2_normalization
self._num_assignments = aggregation_config.num_assignments
# Inputs to extraction function. if self._aggregation_type not in [_VLAD, _ASMK, _ASMK_STAR]:
self._features = tf.compat.v1.placeholder(tf.float32, [None, None])
self._num_features_per_region = tf.compat.v1.placeholder(tf.int32, [None])
# Load codebook into graph.
codebook = tf.compat.v1.get_variable(
"codebook",
shape=[
aggregation_config.codebook_size,
aggregation_config.feature_dimensionality
])
tf.compat.v1.train.init_from_checkpoint(
aggregation_config.codebook_path, {_CLUSTER_CENTERS_VAR_NAME: codebook})
# Construct extraction graph based on desired options.
if self._aggregation_type == _VLAD:
# Feature visual words are unused in the case of VLAD, so just return
# dummy constant.
self._feature_visual_words = tf.constant(-1, dtype=tf.int32)
if aggregation_config.use_regional_aggregation:
self._aggregated_descriptors = self._ComputeRvlad(
self._features,
self._num_features_per_region,
codebook,
use_l2_normalization=aggregation_config.use_l2_normalization,
num_assignments=aggregation_config.num_assignments)
else:
self._aggregated_descriptors = self._ComputeVlad(
self._features,
codebook,
use_l2_normalization=aggregation_config.use_l2_normalization,
num_assignments=aggregation_config.num_assignments)
elif (self._aggregation_type == _ASMK or
self._aggregation_type == _ASMK_STAR):
if aggregation_config.use_regional_aggregation:
(self._aggregated_descriptors,
self._feature_visual_words) = self._ComputeRasmk(
self._features,
self._num_features_per_region,
codebook,
num_assignments=aggregation_config.num_assignments)
else:
(self._aggregated_descriptors,
self._feature_visual_words) = self._ComputeAsmk(
self._features,
codebook,
num_assignments=aggregation_config.num_assignments)
else:
raise ValueError("Invalid aggregation type: %d" % self._aggregation_type) raise ValueError("Invalid aggregation type: %d" % self._aggregation_type)
# Initialize variables in the TF graph. # Load codebook
sess.run(tf.compat.v1.global_variables_initializer()) codebook = tf.Variable(
tf.zeros([self._codebook_size, self._feature_dimensionality],
dtype=tf.float32),
name=_CLUSTER_CENTERS_VAR_NAME)
ckpt = tf.train.Checkpoint(codebook=codebook)
ckpt.restore(self._codebook_path)
self._codebook = codebook
def Extract(self, features, num_features_per_region=None): def Extract(self, features, num_features_per_region=None):
"""Extracts aggregated representation. """Extracts aggregated representation.
...@@ -127,10 +89,13 @@ class ExtractAggregatedRepresentation(object): ...@@ -127,10 +89,13 @@ class ExtractAggregatedRepresentation(object):
Raises: Raises:
ValueError: If inputs are misconfigured. ValueError: If inputs are misconfigured.
""" """
features = tf.cast(features, dtype=tf.float32)
if num_features_per_region is None: if num_features_per_region is None:
# Use dummy value since it is unused. # Use dummy value since it is unused.
num_features_per_region = [] num_features_per_region = []
else: else:
num_features_per_region = tf.cast(num_features_per_region, dtype=tf.int32)
if len(num_features_per_region if len(num_features_per_region
) and sum(num_features_per_region) != features.shape[0]: ) and sum(num_features_per_region) != features.shape[0]:
raise ValueError( raise ValueError(
...@@ -138,12 +103,41 @@ class ExtractAggregatedRepresentation(object): ...@@ -138,12 +103,41 @@ class ExtractAggregatedRepresentation(object):
"features.shape[0] are different: %d vs %d" % "features.shape[0] are different: %d vs %d" %
(sum(num_features_per_region), features.shape[0])) (sum(num_features_per_region), features.shape[0]))
aggregated_descriptors, feature_visual_words = self._sess.run( # Extract features based on desired options.
[self._aggregated_descriptors, self._feature_visual_words], if self._aggregation_type == _VLAD:
feed_dict={ # Feature visual words are unused in the case of VLAD, so just return
self._features: features, # dummy constant.
self._num_features_per_region: num_features_per_region feature_visual_words = tf.constant(-1, dtype=tf.int32)
}) if self._use_regional_aggregation:
aggregated_descriptors = self._ComputeRvlad(
features,
num_features_per_region,
self._codebook,
use_l2_normalization=self._use_l2_normalization,
num_assignments=self._num_assignments)
else:
aggregated_descriptors = self._ComputeVlad(
features,
self._codebook,
use_l2_normalization=self._use_l2_normalization,
num_assignments=self._num_assignments)
elif (self._aggregation_type == _ASMK or
self._aggregation_type == _ASMK_STAR):
if self._use_regional_aggregation:
(aggregated_descriptors,
feature_visual_words) = self._ComputeRasmk(
features,
num_features_per_region,
self._codebook,
num_assignments=self._num_assignments)
else:
(aggregated_descriptors,
feature_visual_words) = self._ComputeAsmk(
features,
self._codebook,
num_assignments=self._num_assignments)
feature_visual_words_output = feature_visual_words.numpy()
# If using ASMK*/RASMK*, binarize the aggregated descriptors. # If using ASMK*/RASMK*, binarize the aggregated descriptors.
if self._aggregation_type == _ASMK_STAR: if self._aggregation_type == _ASMK_STAR:
...@@ -151,9 +145,11 @@ class ExtractAggregatedRepresentation(object): ...@@ -151,9 +145,11 @@ class ExtractAggregatedRepresentation(object):
aggregated_descriptors, [-1, self._feature_dimensionality]) aggregated_descriptors, [-1, self._feature_dimensionality])
packed_descriptors = np.packbits( packed_descriptors = np.packbits(
reshaped_aggregated_descriptors > 0, axis=1) reshaped_aggregated_descriptors > 0, axis=1)
aggregated_descriptors = np.reshape(packed_descriptors, [-1]) aggregated_descriptors_output = np.reshape(packed_descriptors, [-1])
else:
aggregated_descriptors_output = aggregated_descriptors.numpy()
return aggregated_descriptors, feature_visual_words return aggregated_descriptors_output, feature_visual_words_output
def _ComputeVlad(self, def _ComputeVlad(self,
features, features,
...@@ -268,11 +264,12 @@ class ExtractAggregatedRepresentation(object): ...@@ -268,11 +264,12 @@ class ExtractAggregatedRepresentation(object):
output_vlad: VLAD descriptor updated to take into account contribution output_vlad: VLAD descriptor updated to take into account contribution
from ind-th feature. from ind-th feature.
""" """
diff = tf.tile(
tf.expand_dims(features[ind],
axis=0), [num_assignments, 1]) - tf.gather(
codebook, selected_visual_words[ind])
return ind + 1, tf.tensor_scatter_nd_add( return ind + 1, tf.tensor_scatter_nd_add(
vlad, tf.expand_dims(selected_visual_words[ind], axis=1), vlad, tf.expand_dims(selected_visual_words[ind], axis=1), diff)
tf.tile(
tf.expand_dims(features[ind], axis=0), [num_assignments, 1]) -
tf.gather(codebook, selected_visual_words[ind]))
ind_vlad = tf.constant(0, dtype=tf.int32) ind_vlad = tf.constant(0, dtype=tf.int32)
keep_going = lambda j, vlad: tf.less(j, num_features) keep_going = lambda j, vlad: tf.less(j, num_features)
......
...@@ -20,12 +20,15 @@ from __future__ import print_function ...@@ -20,12 +20,15 @@ from __future__ import print_function
import os import os
from absl import flags
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from delf import aggregation_config_pb2 from delf import aggregation_config_pb2
from delf import feature_aggregation_extractor from delf import feature_aggregation_extractor
FLAGS = flags.FLAGS
class FeatureAggregationTest(tf.test.TestCase): class FeatureAggregationTest(tf.test.TestCase):
...@@ -35,17 +38,15 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -35,17 +38,15 @@ class FeatureAggregationTest(tf.test.TestCase):
Args: Args:
checkpoint_path: Directory where codebook is saved to. checkpoint_path: Directory where codebook is saved to.
""" """
with tf.Graph().as_default() as g, self.session(graph=g) as sess: codebook = tf.Variable(
codebook = tf.Variable( [[0.5, 0.5], [0.0, 0.0], [1.0, 0.0], [-0.5, -0.5], [0.0, 1.0]],
[[0.5, 0.5], [0.0, 0.0], [1.0, 0.0], [-0.5, -0.5], [0.0, 1.0]], name='clusters',
name='clusters') dtype=tf.float32)
saver = tf.compat.v1.train.Saver([codebook]) ckpt = tf.train.Checkpoint(codebook=codebook)
sess.run(tf.compat.v1.global_variables_initializer()) ckpt.write(checkpoint_path)
saver.save(sess, checkpoint_path)
def setUp(self): def setUp(self):
self._codebook_path = os.path.join(tf.compat.v1.test.get_temp_dir(), self._codebook_path = os.path.join(FLAGS.test_tmpdir, 'test_codebook')
'test_codebook')
self._CreateCodebook(self._codebook_path) self._CreateCodebook(self._codebook_path)
def testComputeNormalizedVladWorks(self): def testComputeNormalizedVladWorks(self):
...@@ -61,10 +62,9 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -61,10 +62,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.num_assignments = 1 config.num_assignments = 1
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) vlad, extra_output = extractor.Extract(features)
vlad, extra_output = extractor.Extract(features)
# Define expected results. # Define expected results.
exp_vlad = [ exp_vlad = [
...@@ -90,10 +90,9 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -90,10 +90,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.feature_batch_size = 2 config.feature_batch_size = 2
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) vlad, extra_output = extractor.Extract(features)
vlad, extra_output = extractor.Extract(features)
# Define expected results. # Define expected results.
exp_vlad = [ exp_vlad = [
...@@ -118,10 +117,9 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -118,10 +117,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.num_assignments = 1 config.num_assignments = 1
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) vlad, extra_output = extractor.Extract(features)
vlad, extra_output = extractor.Extract(features)
# Define expected results. # Define expected results.
exp_vlad = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.5, 1.0, 1.0] exp_vlad = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.5, 1.0, 1.0]
...@@ -144,10 +142,9 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -144,10 +142,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.num_assignments = 3 config.num_assignments = 3
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) vlad, extra_output = extractor.Extract(features)
vlad, extra_output = extractor.Extract(features)
# Define expected results. # Define expected results.
exp_vlad = [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, -0.5, 0.5, 0.0, 0.0] exp_vlad = [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, -0.5, 0.5, 0.0, 0.0]
...@@ -168,10 +165,9 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -168,10 +165,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.codebook_path = self._codebook_path config.codebook_path = self._codebook_path
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) vlad, extra_output = extractor.Extract(features)
vlad, extra_output = extractor.Extract(features)
# Define expected results. # Define expected results.
exp_vlad = np.zeros([10], dtype=float) exp_vlad = np.zeros([10], dtype=float)
...@@ -197,10 +193,9 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -197,10 +193,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True config.use_regional_aggregation = True
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) rvlad, extra_output = extractor.Extract(features, num_features_per_region)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
# Define expected results. # Define expected results.
exp_rvlad = [ exp_rvlad = [
...@@ -228,10 +223,9 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -228,10 +223,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True config.use_regional_aggregation = True
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) rvlad, extra_output = extractor.Extract(features, num_features_per_region)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
# Define expected results. # Define expected results.
exp_rvlad = [ exp_rvlad = [
...@@ -256,10 +250,9 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -256,10 +250,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True config.use_regional_aggregation = True
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) rvlad, extra_output = extractor.Extract(features, num_features_per_region)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
# Define expected results. # Define expected results.
exp_rvlad = np.zeros([10], dtype=float) exp_rvlad = np.zeros([10], dtype=float)
...@@ -286,10 +279,9 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -286,10 +279,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True config.use_regional_aggregation = True
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) rvlad, extra_output = extractor.Extract(features, num_features_per_region)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
# Define expected results. # Define expected results.
exp_rvlad = [ exp_rvlad = [
...@@ -318,10 +310,9 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -318,10 +310,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True config.use_regional_aggregation = True
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) rvlad, extra_output = extractor.Extract(features, num_features_per_region)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
# Define expected results. # Define expected results.
exp_rvlad = [ exp_rvlad = [
...@@ -349,14 +340,13 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -349,14 +340,13 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True config.use_regional_aggregation = True
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) with self.assertRaisesRegex(
with self.assertRaisesRegex( ValueError,
ValueError, r'Incorrect arguments: sum\(num_features_per_region\) and '
r'Incorrect arguments: sum\(num_features_per_region\) and ' r'features.shape\[0\] are different'):
r'features.shape\[0\] are different'): extractor.Extract(features, num_features_per_region)
extractor.Extract(features, num_features_per_region)
def testComputeAsmkWorks(self): def testComputeAsmkWorks(self):
# Construct inputs. # Construct inputs.
...@@ -370,10 +360,9 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -370,10 +360,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.num_assignments = 1 config.num_assignments = 1
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) asmk, visual_words = extractor.Extract(features)
asmk, visual_words = extractor.Extract(features)
# Define expected results. # Define expected results.
exp_asmk = [-0.707107, 0.707107, 0.707107, 0.707107] exp_asmk = [-0.707107, 0.707107, 0.707107, 0.707107]
...@@ -395,10 +384,9 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -395,10 +384,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.num_assignments = 1 config.num_assignments = 1
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) asmk_star, visual_words = extractor.Extract(features)
asmk_star, visual_words = extractor.Extract(features)
# Define expected results. # Define expected results.
exp_asmk_star = [64, 192] exp_asmk_star = [64, 192]
...@@ -420,10 +408,9 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -420,10 +408,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.num_assignments = 3 config.num_assignments = 3
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) asmk, visual_words = extractor.Extract(features)
asmk, visual_words = extractor.Extract(features)
# Define expected results. # Define expected results.
exp_asmk = [0.707107, 0.707107, 0.0, 1.0, -0.707107, 0.707107] exp_asmk = [0.707107, 0.707107, 0.0, 1.0, -0.707107, 0.707107]
...@@ -448,10 +435,9 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -448,10 +435,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True config.use_regional_aggregation = True
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) rasmk, visual_words = extractor.Extract(features, num_features_per_region)
rasmk, visual_words = extractor.Extract(features, num_features_per_region)
# Define expected results. # Define expected results.
exp_rasmk = [-0.707107, 0.707107, 0.361261, 0.932465] exp_rasmk = [-0.707107, 0.707107, 0.361261, 0.932465]
...@@ -476,11 +462,10 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -476,11 +462,10 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True config.use_regional_aggregation = True
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config) rasmk_star, visual_words = extractor.Extract(features,
rasmk_star, visual_words = extractor.Extract(features, num_features_per_region)
num_features_per_region)
# Define expected results. # Define expected results.
exp_rasmk_star = [64, 192] exp_rasmk_star = [64, 192]
...@@ -500,10 +485,9 @@ class FeatureAggregationTest(tf.test.TestCase): ...@@ -500,10 +485,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True config.use_regional_aggregation = True
# Run tested function. # Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess: with self.assertRaisesRegex(ValueError, 'Invalid aggregation type'):
with self.assertRaisesRegex(ValueError, 'Invalid aggregation type'): feature_aggregation_extractor.ExtractAggregatedRepresentation(
feature_aggregation_extractor.ExtractAggregatedRepresentation( config)
sess, config)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment