Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
31ca3b97
Commit
31ca3b97
authored
Jul 23, 2020
by
Kaushik Shivakumar
Browse files
resovle merge conflicts
parents
3e9d886d
7fcd7cba
Changes
392
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1033 additions
and
323 deletions
+1033
-323
research/attention_ocr/python/model.py
research/attention_ocr/python/model.py
+303
-129
research/attention_ocr/python/model_export.py
research/attention_ocr/python/model_export.py
+198
-0
research/attention_ocr/python/model_export_lib.py
research/attention_ocr/python/model_export_lib.py
+108
-0
research/attention_ocr/python/model_export_test.py
research/attention_ocr/python/model_export_test.py
+160
-0
research/attention_ocr/python/model_test.py
research/attention_ocr/python/model_test.py
+71
-49
research/attention_ocr/python/sequence_layers.py
research/attention_ocr/python/sequence_layers.py
+12
-12
research/attention_ocr/python/sequence_layers_test.py
research/attention_ocr/python/sequence_layers_test.py
+2
-2
research/attention_ocr/python/train.py
research/attention_ocr/python/train.py
+11
-11
research/attention_ocr/python/utils.py
research/attention_ocr/python/utils.py
+23
-6
research/deeplab/datasets/build_cityscapes_data.py
research/deeplab/datasets/build_cityscapes_data.py
+17
-7
research/deeplab/datasets/convert_cityscapes.sh
research/deeplab/datasets/convert_cityscapes.sh
+2
-0
research/deeplab/deprecated/segmentation_dataset.py
research/deeplab/deprecated/segmentation_dataset.py
+2
-2
research/deeplab/g3doc/cityscapes.md
research/deeplab/g3doc/cityscapes.md
+3
-3
research/deeplab/g3doc/installation.md
research/deeplab/g3doc/installation.md
+1
-1
research/deeplab/local_test.sh
research/deeplab/local_test.sh
+2
-2
research/delf/INSTALL_INSTRUCTIONS.md
research/delf/INSTALL_INSTRUCTIONS.md
+38
-3
research/delf/README.md
research/delf/README.md
+52
-91
research/delf/delf/protos/delf_config.proto
research/delf/delf/protos/delf_config.proto
+3
-0
research/delf/delf/python/detect_to_retrieve/cluster_delf_features.py
...f/delf/python/detect_to_retrieve/cluster_delf_features.py
+1
-1
research/delf/delf/python/examples/extractor.py
research/delf/delf/python/examples/extractor.py
+24
-4
No files found.
research/attention_ocr/python/model.py
View file @
31ca3b97
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to build the Attention OCR model.
Usage example:
...
...
@@ -26,6 +25,7 @@ Usage example:
import
sys
import
collections
import
logging
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.contrib
import
slim
from
tensorflow.contrib.slim.nets
import
inception
...
...
@@ -35,29 +35,28 @@ import sequence_layers
import
utils
OutputEndpoints
=
collections
.
namedtuple
(
'OutputEndpoints'
,
[
'chars_logit'
,
'chars_log_prob'
,
'predicted_chars'
,
'predicted_scores'
,
'predicted_text'
'chars_logit'
,
'chars_log_prob'
,
'predicted_chars'
,
'predicted_scores'
,
'predicted_text'
,
'predicted_length'
,
'predicted_conf'
,
'normalized_seq_conf'
])
# TODO(gorban): replace with tf.HParams when it is released.
ModelParams
=
collections
.
namedtuple
(
'ModelParams'
,
[
'num_char_classes'
,
'seq_length'
,
'num_views'
,
'null_code'
])
ModelParams
=
collections
.
namedtuple
(
'ModelParams'
,
[
'num_char_classes'
,
'seq_length'
,
'num_views'
,
'null_code'
])
ConvTowerParams
=
collections
.
namedtuple
(
'ConvTowerParams'
,
[
'final_endpoint'
])
SequenceLogitsParams
=
collections
.
namedtuple
(
'SequenceLogitsParams'
,
[
'use_attention'
,
'use_autoregression'
,
'num_lstm_units'
,
'weight_decay'
,
'lstm_state_clip_value'
'use_attention'
,
'use_autoregression'
,
'num_lstm_units'
,
'weight_decay'
,
'lstm_state_clip_value'
])
SequenceLossParams
=
collections
.
namedtuple
(
'SequenceLossParams'
,
[
'label_smoothing'
,
'ignore_nulls'
,
'average_across_timestep
s'
])
SequenceLossParams
=
collections
.
namedtuple
(
'SequenceLossParam
s'
,
[
'label_smoothing'
,
'ignore_nulls'
,
'average_across_timesteps'
])
EncodeCoordinatesParams
=
collections
.
namedtuple
(
'EncodeCoordinatesParams'
,
[
'enabled'
])
EncodeCoordinatesParams
=
collections
.
namedtuple
(
'EncodeCoordinatesParams'
,
[
'enabled'
])
def
_dict_to_array
(
id_to_char
,
default_character
):
...
...
@@ -85,16 +84,16 @@ class CharsetMapper(object):
"""
mapping_strings
=
tf
.
constant
(
_dict_to_array
(
charset
,
default_character
))
self
.
table
=
tf
.
contrib
.
lookup
.
index_to_string_table_from_tensor
(
mapping
=
mapping_strings
,
default_value
=
default_character
)
mapping
=
mapping_strings
,
default_value
=
default_character
)
def
get_text
(
self
,
ids
):
"""Returns a string corresponding to a sequence of character ids.
Args:
ids: a tensor with shape [batch_size, max_sequence_length]
"""
return
tf
.
reduce_join
(
self
.
table
.
lookup
(
tf
.
to_int64
(
ids
)),
reduction_indice
s
=
1
)
"""
return
tf
.
strings
.
reduce_join
(
inputs
=
self
.
table
.
lookup
(
tf
.
cast
(
ids
,
dtype
=
tf
.
int64
)),
axi
s
=
1
)
def
get_softmax_loss_fn
(
label_smoothing
):
...
...
@@ -111,16 +110,153 @@ def get_softmax_loss_fn(label_smoothing):
def
loss_fn
(
labels
,
logits
):
return
(
tf
.
nn
.
softmax_cross_entropy_with_logits
(
logits
=
logits
,
labels
=
labels
))
logits
=
logits
,
labels
=
tf
.
stop_gradient
(
labels
))
)
else
:
def
loss_fn
(
labels
,
logits
):
return
tf
.
nn
.
sparse_softmax_cross_entropy_with_logits
(
logits
=
logits
,
labels
=
labels
)
logits
=
logits
,
labels
=
labels
)
return
loss_fn
def
get_tensor_dimensions
(
tensor
):
"""Returns the shape components of a 4D tensor with variable batch size.
Args:
tensor : A 4D tensor, whose last 3 dimensions are known at graph
construction time.
Returns:
batch_size : The first dimension as a tensor object.
height : The second dimension as a scalar value.
width : The third dimension as a scalar value.
num_features : The forth dimension as a scalar value.
Raises:
ValueError: if input tensor does not have 4 dimensions.
"""
if
len
(
tensor
.
get_shape
().
dims
)
!=
4
:
raise
ValueError
(
'Incompatible shape: len(tensor.get_shape().dims) != 4 (%d != 4)'
%
len
(
tensor
.
get_shape
().
dims
))
batch_size
=
tf
.
shape
(
input
=
tensor
)[
0
]
height
=
tensor
.
get_shape
().
dims
[
1
].
value
width
=
tensor
.
get_shape
().
dims
[
2
].
value
num_features
=
tensor
.
get_shape
().
dims
[
3
].
value
return
batch_size
,
height
,
width
,
num_features
def
lookup_indexed_value
(
indices
,
row_vecs
):
"""Lookup values in each row of 'row_vecs' indexed by 'indices'.
For each sample in the batch, look up the element for the corresponding
index.
Args:
indices : A tensor of shape (batch, )
row_vecs : A tensor of shape [batch, depth]
Returns:
A tensor of shape (batch, ) formed by row_vecs[i, indices[i]].
"""
gather_indices
=
tf
.
stack
((
tf
.
range
(
tf
.
shape
(
input
=
row_vecs
)[
0
],
dtype
=
tf
.
int32
),
tf
.
cast
(
indices
,
tf
.
int32
)),
axis
=
1
)
return
tf
.
gather_nd
(
row_vecs
,
gather_indices
)
@
utils
.
ConvertAllInputsToTensors
def
max_char_logprob_cumsum
(
char_log_prob
):
"""Computes the cumulative sum of character logprob for all sequence lengths.
Args:
char_log_prob: A tensor of shape [batch x seq_length x num_char_classes]
with log probabilities of a character.
Returns:
A tensor of shape [batch x (seq_length+1)] where each element x[_, j] is
the sum of the max char logprob for all positions upto j.
Note this duplicates the final column and produces (seq_length+1) columns
so the same function can be used regardless whether use_length_predictions
is true or false.
"""
max_char_log_prob
=
tf
.
reduce_max
(
input_tensor
=
char_log_prob
,
axis
=
2
)
# For an input array [a, b, c]) tf.cumsum returns [a, a + b, a + b + c] if
# exclusive set to False (default).
return
tf
.
cumsum
(
max_char_log_prob
,
axis
=
1
,
exclusive
=
False
)
def
find_length_by_null
(
predicted_chars
,
null_code
):
"""Determine sequence length by finding null_code among predicted char IDs.
Given the char class ID for each position, compute the sequence length.
Note that this function computes this based on the number of null_code,
instead of the position of the first null_code.
Args:
predicted_chars: A tensor of [batch x seq_length] where each element stores
the char class ID with max probability;
null_code: an int32, character id for the NULL.
Returns:
A [batch, ] tensor which stores the sequence length for each sample.
"""
return
tf
.
reduce_sum
(
input_tensor
=
tf
.
cast
(
tf
.
not_equal
(
null_code
,
predicted_chars
),
tf
.
int32
),
axis
=
1
)
def
axis_pad
(
tensor
,
axis
,
before
=
0
,
after
=
0
,
constant_values
=
0.0
):
"""Pad a tensor with the specified values along a single axis.
Args:
tensor: a Tensor;
axis: the dimension to add pad along to;
before: number of values to add before the contents of tensor in the
selected dimension;
after: number of values to add after the contents of tensor in the selected
dimension;
constant_values: the scalar pad value to use. Must be same type as tensor.
Returns:
A Tensor. Has the same type as the input tensor, but with a changed shape
along the specified dimension.
"""
if
before
==
0
and
after
==
0
:
return
tensor
ndims
=
tensor
.
shape
.
ndims
padding_size
=
np
.
zeros
((
ndims
,
2
),
dtype
=
'int32'
)
padding_size
[
axis
]
=
before
,
after
return
tf
.
pad
(
tensor
=
tensor
,
paddings
=
tf
.
constant
(
padding_size
),
constant_values
=
constant_values
)
def
null_based_length_prediction
(
chars_log_prob
,
null_code
):
"""Computes length and confidence of prediction based on positions of NULLs.
Args:
chars_log_prob: A tensor of shape [batch x seq_length x num_char_classes]
with log probabilities of a character;
null_code: an int32, character id for the NULL.
Returns:
A tuple (text_log_prob, predicted_length), where
text_log_prob - is a tensor of the same shape as length_log_prob.
Element #0 of the output corresponds to probability of the empty string,
element #seq_length - is the probability of length=seq_length.
predicted_length is a tensor with shape [batch].
"""
predicted_chars
=
tf
.
cast
(
tf
.
argmax
(
input
=
chars_log_prob
,
axis
=
2
),
dtype
=
tf
.
int32
)
# We do right pad to support sequences with seq_length elements.
text_log_prob
=
max_char_logprob_cumsum
(
axis_pad
(
chars_log_prob
,
axis
=
1
,
after
=
1
))
predicted_length
=
find_length_by_null
(
predicted_chars
,
null_code
)
return
text_log_prob
,
predicted_length
class
Model
(
object
):
"""Class to create the Attention OCR Model."""
...
...
@@ -137,24 +273,24 @@ class Model(object):
num_char_classes: size of character set.
seq_length: number of characters in a sequence.
num_views: Number of views (conv towers) to use.
null_code: A character code corresponding to a character which
indicates
end of a sequence.
mparams: a dictionary with hyper parameters for methods, keys -
function
names, values - corresponding namedtuples.
null_code: A character code corresponding to a character which
indicates
end of a sequence.
mparams: a dictionary with hyper parameters for methods, keys -
function
names, values - corresponding namedtuples.
charset: an optional dictionary with a mapping between character ids and
utf8 strings. If specified the OutputEndpoints.predicted_text will
utf8
encoded strings corresponding to the character ids returned by
utf8 strings. If specified the OutputEndpoints.predicted_text will
utf8
encoded strings corresponding to the character ids returned by
OutputEndpoints.predicted_chars (by default the predicted_text contains
an empty vector).
an empty vector).
NOTE: Make sure you call tf.tables_initializer().run() if the charset
specified.
specified.
"""
super
(
Model
,
self
).
__init__
()
self
.
_params
=
ModelParams
(
num_char_classes
=
num_char_classes
,
seq_length
=
seq_length
,
num_views
=
num_views
,
null_code
=
null_code
)
num_char_classes
=
num_char_classes
,
seq_length
=
seq_length
,
num_views
=
num_views
,
null_code
=
null_code
)
self
.
_mparams
=
self
.
default_mparams
()
if
mparams
:
self
.
_mparams
.
update
(
mparams
)
...
...
@@ -162,21 +298,22 @@ class Model(object):
def
default_mparams
(
self
):
return
{
'conv_tower_fn'
:
ConvTowerParams
(
final_endpoint
=
'Mixed_5d'
),
'sequence_logit_fn'
:
SequenceLogitsParams
(
use_attention
=
True
,
use_autoregression
=
True
,
num_lstm_units
=
256
,
weight_decay
=
0.00004
,
lstm_state_clip_value
=
10.0
),
'sequence_loss_fn'
:
SequenceLossParams
(
label_smoothing
=
0.1
,
ignore_nulls
=
True
,
average_across_timesteps
=
False
),
'encode_coordinates_fn'
:
EncodeCoordinatesParams
(
enabled
=
False
)
'conv_tower_fn'
:
ConvTowerParams
(
final_endpoint
=
'Mixed_5d'
),
'sequence_logit_fn'
:
SequenceLogitsParams
(
use_attention
=
True
,
use_autoregression
=
True
,
num_lstm_units
=
256
,
weight_decay
=
0.00004
,
lstm_state_clip_value
=
10.0
),
'sequence_loss_fn'
:
SequenceLossParams
(
label_smoothing
=
0.1
,
ignore_nulls
=
True
,
average_across_timesteps
=
False
),
'encode_coordinates_fn'
:
EncodeCoordinatesParams
(
enabled
=
False
)
}
def
set_mparam
(
self
,
function
,
**
kwargs
):
...
...
@@ -198,14 +335,14 @@ class Model(object):
"""
mparams
=
self
.
_mparams
[
'conv_tower_fn'
]
logging
.
debug
(
'Using final_endpoint=%s'
,
mparams
.
final_endpoint
)
with
tf
.
variable_scope
(
'conv_tower_fn/INCE'
):
with
tf
.
compat
.
v1
.
variable_scope
(
'conv_tower_fn/INCE'
):
if
reuse
:
tf
.
get_variable_scope
().
reuse_variables
()
tf
.
compat
.
v1
.
get_variable_scope
().
reuse_variables
()
with
slim
.
arg_scope
(
inception
.
inception_v3_arg_scope
()):
with
slim
.
arg_scope
([
slim
.
batch_norm
,
slim
.
dropout
],
is_training
=
is_training
):
net
,
_
=
inception
.
inception_v3_base
(
images
,
final_endpoint
=
mparams
.
final_endpoint
)
images
,
final_endpoint
=
mparams
.
final_endpoint
)
return
net
def
_create_lstm_inputs
(
self
,
net
):
...
...
@@ -222,10 +359,10 @@ class Model(object):
"""
num_features
=
net
.
get_shape
().
dims
[
1
].
value
if
num_features
<
self
.
_params
.
seq_length
:
raise
AssertionError
(
'Incorrect dimension #1 of input tensor'
' %d should be bigger than %d (shape=%s)'
%
(
num_features
,
self
.
_params
.
seq_length
,
net
.
get_shape
()))
raise
AssertionError
(
'Incorrect dimension #1 of input tensor'
' %d should be bigger than %d (shape=%s)'
%
(
num_features
,
self
.
_params
.
seq_length
,
net
.
get_shape
()))
elif
num_features
>
self
.
_params
.
seq_length
:
logging
.
warning
(
'Ignoring some features: use %d of %d (shape=%s)'
,
self
.
_params
.
seq_length
,
num_features
,
net
.
get_shape
())
...
...
@@ -236,7 +373,7 @@ class Model(object):
def
sequence_logit_fn
(
self
,
net
,
labels_one_hot
):
mparams
=
self
.
_mparams
[
'sequence_logit_fn'
]
# TODO(gorban): remove /alias suffixes from the scopes.
with
tf
.
variable_scope
(
'sequence_logit_fn/SQLR'
):
with
tf
.
compat
.
v1
.
variable_scope
(
'sequence_logit_fn/SQLR'
):
layer_class
=
sequence_layers
.
get_layer_class
(
mparams
.
use_attention
,
mparams
.
use_autoregression
)
layer
=
layer_class
(
net
,
labels_one_hot
,
self
.
_params
,
mparams
)
...
...
@@ -252,16 +389,16 @@ class Model(object):
A tensor with the same size as any input tensors.
"""
batch_size
,
height
,
width
,
num_features
=
[
d
.
value
for
d
in
nets_list
[
0
].
get_shape
().
dims
d
.
value
for
d
in
nets_list
[
0
].
get_shape
().
dims
]
xy_flat_shape
=
(
batch_size
,
1
,
height
*
width
,
num_features
)
nets_for_merge
=
[]
with
tf
.
variable_scope
(
'max_pool_views'
,
values
=
nets_list
):
with
tf
.
compat
.
v1
.
variable_scope
(
'max_pool_views'
,
values
=
nets_list
):
for
net
in
nets_list
:
nets_for_merge
.
append
(
tf
.
reshape
(
net
,
xy_flat_shape
))
merged_net
=
tf
.
concat
(
nets_for_merge
,
1
)
net
=
slim
.
max_pool2d
(
merged_net
,
kernel_size
=
[
len
(
nets_list
),
1
],
stride
=
1
)
merged_net
,
kernel_size
=
[
len
(
nets_list
),
1
],
stride
=
1
)
net
=
tf
.
reshape
(
net
,
(
batch_size
,
height
,
width
,
num_features
))
return
net
...
...
@@ -277,18 +414,20 @@ class Model(object):
Returns:
A tensor of shape [batch_size, seq_length, features_size].
"""
with
tf
.
variable_scope
(
'pool_views_fn/STCK'
):
with
tf
.
compat
.
v1
.
variable_scope
(
'pool_views_fn/STCK'
):
net
=
tf
.
concat
(
nets
,
1
)
batch_size
=
net
.
get_shape
().
dims
[
0
].
value
batch_size
=
tf
.
shape
(
input
=
net
)[
0
]
image_size
=
net
.
get_shape
().
dims
[
1
].
value
*
\
net
.
get_shape
().
dims
[
2
].
value
feature_size
=
net
.
get_shape
().
dims
[
3
].
value
return
tf
.
reshape
(
net
,
[
batch_size
,
-
1
,
feature_size
])
return
tf
.
reshape
(
net
,
tf
.
stack
(
[
batch_size
,
image_size
,
feature_size
])
)
def
char_predictions
(
self
,
chars_logit
):
"""Returns confidence scores (softmax values) for predicted characters.
Args:
chars_logit: chars logits, a tensor with shape
[batch_size x seq_length x
num_char_classes]
chars_logit: chars logits, a tensor with shape
[batch_size x seq_length x
num_char_classes]
Returns:
A tuple (ids, log_prob, scores), where:
...
...
@@ -301,12 +440,17 @@ class Model(object):
with shape [batch_size x seq_length].
"""
log_prob
=
utils
.
logits_to_log_prob
(
chars_logit
)
ids
=
tf
.
to_int32
(
tf
.
argmax
(
log_prob
,
axis
=
2
),
name
=
'predicted_chars'
)
ids
=
tf
.
cast
(
tf
.
argmax
(
input
=
log_prob
,
axis
=
2
),
name
=
'predicted_chars'
,
dtype
=
tf
.
int32
)
mask
=
tf
.
cast
(
slim
.
one_hot_encoding
(
ids
,
self
.
_params
.
num_char_classes
),
tf
.
bool
)
slim
.
one_hot_encoding
(
ids
,
self
.
_params
.
num_char_classes
),
tf
.
bool
)
all_scores
=
tf
.
nn
.
softmax
(
chars_logit
)
selected_scores
=
tf
.
boolean_mask
(
all_scores
,
mask
,
name
=
'char_scores'
)
scores
=
tf
.
reshape
(
selected_scores
,
shape
=
(
-
1
,
self
.
_params
.
seq_length
))
selected_scores
=
tf
.
boolean_mask
(
tensor
=
all_scores
,
mask
=
mask
,
name
=
'char_scores'
)
scores
=
tf
.
reshape
(
selected_scores
,
shape
=
(
-
1
,
self
.
_params
.
seq_length
),
name
=
'predicted_scores'
)
return
ids
,
log_prob
,
scores
def
encode_coordinates_fn
(
self
,
net
):
...
...
@@ -323,12 +467,12 @@ class Model(object):
"""
mparams
=
self
.
_mparams
[
'encode_coordinates_fn'
]
if
mparams
.
enabled
:
batch_size
,
h
,
w
,
_
=
n
et
.
shape
.
as_list
(
)
batch_size
,
h
,
w
,
_
=
g
et
_tensor_dimensions
(
net
)
x
,
y
=
tf
.
meshgrid
(
tf
.
range
(
w
),
tf
.
range
(
h
))
w_loc
=
slim
.
one_hot_encoding
(
x
,
num_classes
=
w
)
h_loc
=
slim
.
one_hot_encoding
(
y
,
num_classes
=
h
)
loc
=
tf
.
concat
([
h_loc
,
w_loc
],
2
)
loc
=
tf
.
tile
(
tf
.
expand_dims
(
loc
,
0
),
[
batch_size
,
1
,
1
,
1
])
loc
=
tf
.
tile
(
tf
.
expand_dims
(
loc
,
0
),
tf
.
stack
(
[
batch_size
,
1
,
1
,
1
])
)
return
tf
.
concat
([
net
,
loc
],
3
)
else
:
return
net
...
...
@@ -341,7 +485,8 @@ class Model(object):
"""Creates a base part of the Model (no gradients, losses or summaries).
Args:
images: A tensor of shape [batch_size, height, width, channels].
images: A tensor of shape [batch_size, height, width, channels] with pixel
values in the range [0.0, 1.0].
labels_one_hot: Optional (can be None) one-hot encoding for ground truth
labels. If provided the function will create a model for training.
scope: Optional variable_scope.
...
...
@@ -353,14 +498,19 @@ class Model(object):
"""
logging
.
debug
(
'images: %s'
,
images
)
is_training
=
labels_one_hot
is
not
None
with
tf
.
variable_scope
(
scope
,
reuse
=
reuse
):
# Normalize image pixel values to have a symmetrical range around zero.
images
=
tf
.
subtract
(
images
,
0.5
)
images
=
tf
.
multiply
(
images
,
2.5
)
with
tf
.
compat
.
v1
.
variable_scope
(
scope
,
reuse
=
reuse
):
views
=
tf
.
split
(
value
=
images
,
num_or_size_splits
=
self
.
_params
.
num_views
,
axis
=
2
)
value
=
images
,
num_or_size_splits
=
self
.
_params
.
num_views
,
axis
=
2
)
logging
.
debug
(
'Views=%d single view: %s'
,
len
(
views
),
views
[
0
])
nets
=
[
self
.
conv_tower_fn
(
v
,
is_training
,
reuse
=
(
i
!=
0
))
for
i
,
v
in
enumerate
(
views
)
self
.
conv_tower_fn
(
v
,
is_training
,
reuse
=
(
i
!=
0
))
for
i
,
v
in
enumerate
(
views
)
]
logging
.
debug
(
'Conv tower: %s'
,
nets
[
0
])
...
...
@@ -374,18 +524,34 @@ class Model(object):
logging
.
debug
(
'chars_logit: %s'
,
chars_logit
)
predicted_chars
,
chars_log_prob
,
predicted_scores
=
(
self
.
char_predictions
(
chars_logit
))
self
.
char_predictions
(
chars_logit
))
if
self
.
_charset
:
character_mapper
=
CharsetMapper
(
self
.
_charset
)
predicted_text
=
character_mapper
.
get_text
(
predicted_chars
)
else
:
predicted_text
=
tf
.
constant
([])
text_log_prob
,
predicted_length
=
null_based_length_prediction
(
chars_log_prob
,
self
.
_params
.
null_code
)
predicted_conf
=
lookup_indexed_value
(
predicted_length
,
text_log_prob
)
# Convert predicted confidence from sum of logs to geometric mean
normalized_seq_conf
=
tf
.
exp
(
tf
.
divide
(
predicted_conf
,
tf
.
cast
(
predicted_length
+
1
,
predicted_conf
.
dtype
)),
name
=
'normalized_seq_conf'
)
predicted_conf
=
tf
.
identity
(
predicted_conf
,
name
=
'predicted_conf'
)
predicted_text
=
tf
.
identity
(
predicted_text
,
name
=
'predicted_text'
)
predicted_length
=
tf
.
identity
(
predicted_length
,
name
=
'predicted_length'
)
return
OutputEndpoints
(
chars_logit
=
chars_logit
,
chars_log_prob
=
chars_log_prob
,
predicted_chars
=
predicted_chars
,
predicted_scores
=
predicted_scores
,
predicted_text
=
predicted_text
)
chars_logit
=
chars_logit
,
chars_log_prob
=
chars_log_prob
,
predicted_chars
=
predicted_chars
,
predicted_scores
=
predicted_scores
,
predicted_length
=
predicted_length
,
predicted_text
=
predicted_text
,
predicted_conf
=
predicted_conf
,
normalized_seq_conf
=
normalized_seq_conf
)
def
create_loss
(
self
,
data
,
endpoints
):
"""Creates all losses required to train the model.
...
...
@@ -404,7 +570,7 @@ class Model(object):
# multiple losses including regularization losses.
self
.
sequence_loss_fn
(
endpoints
.
chars_logit
,
data
.
labels
)
total_loss
=
slim
.
losses
.
get_total_loss
()
tf
.
summary
.
scalar
(
'TotalLoss'
,
total_loss
)
tf
.
compat
.
v1
.
summary
.
scalar
(
'TotalLoss'
,
total_loss
)
return
total_loss
def
label_smoothing_regularization
(
self
,
chars_labels
,
weight
=
0.1
):
...
...
@@ -413,15 +579,15 @@ class Model(object):
Uses the same method as in https://arxiv.org/abs/1512.00567.
Args:
chars_labels: ground truth ids of charactes,
shape=[batch_size,
seq_length];
chars_labels: ground truth ids of charactes,
shape=[batch_size,
seq_length];
weight: label-smoothing regularization weight.
Returns:
A sensor with the same shape as the input.
"""
one_hot_labels
=
tf
.
one_hot
(
chars_labels
,
depth
=
self
.
_params
.
num_char_classes
,
axis
=-
1
)
chars_labels
,
depth
=
self
.
_params
.
num_char_classes
,
axis
=-
1
)
pos_weight
=
1.0
-
weight
neg_weight
=
weight
/
self
.
_params
.
num_char_classes
return
one_hot_labels
*
pos_weight
+
neg_weight
...
...
@@ -433,20 +599,20 @@ class Model(object):
also ignore all null chars after the first one.
Args:
chars_logits: logits for predicted characters,
shape=[batch_size,
seq_length, num_char_classes];
chars_labels: ground truth ids of characters,
shape=[batch_size,
seq_length];
chars_logits: logits for predicted characters,
shape=[batch_size,
seq_length, num_char_classes];
chars_labels: ground truth ids of characters,
shape=[batch_size,
seq_length];
mparams: method hyper parameters.
Returns:
A Tensor with shape [batch_size] - the log-perplexity for each sequence.
"""
mparams
=
self
.
_mparams
[
'sequence_loss_fn'
]
with
tf
.
variable_scope
(
'sequence_loss_fn/SLF'
):
with
tf
.
compat
.
v1
.
variable_scope
(
'sequence_loss_fn/SLF'
):
if
mparams
.
label_smoothing
>
0
:
smoothed_one_hot_labels
=
self
.
label_smoothing_regularization
(
chars_labels
,
mparams
.
label_smoothing
)
chars_labels
,
mparams
.
label_smoothing
)
labels_list
=
tf
.
unstack
(
smoothed_one_hot_labels
,
axis
=
1
)
else
:
# NOTE: in case of sparse softmax we are not using one-hot
...
...
@@ -459,21 +625,21 @@ class Model(object):
else
:
# Suppose that reject character is the last in the charset.
reject_char
=
tf
.
constant
(
self
.
_params
.
num_char_classes
-
1
,
shape
=
(
batch_size
,
seq_length
),
dtype
=
tf
.
int64
)
self
.
_params
.
num_char_classes
-
1
,
shape
=
(
batch_size
,
seq_length
),
dtype
=
tf
.
int64
)
known_char
=
tf
.
not_equal
(
chars_labels
,
reject_char
)
weights
=
tf
.
to_floa
t
(
known_char
)
weights
=
tf
.
cas
t
(
known_char
,
dtype
=
tf
.
float32
)
logits_list
=
tf
.
unstack
(
chars_logits
,
axis
=
1
)
weights_list
=
tf
.
unstack
(
weights
,
axis
=
1
)
loss
=
tf
.
contrib
.
legacy_seq2seq
.
sequence_loss
(
logits_list
,
labels_list
,
weights_list
,
softmax_loss_function
=
get_softmax_loss_fn
(
mparams
.
label_smoothing
),
average_across_timesteps
=
mparams
.
average_across_timesteps
)
tf
.
losses
.
add_loss
(
loss
)
logits_list
,
labels_list
,
weights_list
,
softmax_loss_function
=
get_softmax_loss_fn
(
mparams
.
label_smoothing
),
average_across_timesteps
=
mparams
.
average_across_timesteps
)
tf
.
compat
.
v1
.
losses
.
add_loss
(
loss
)
return
loss
def
create_summaries
(
self
,
data
,
endpoints
,
charset
,
is_training
):
...
...
@@ -482,8 +648,8 @@ class Model(object):
Args:
data: InputEndpoints namedtuple.
endpoints: OutputEndpoints namedtuple.
charset: A dictionary with mapping between character codes and
unicode
characters. Use the one provided by a dataset.charset.
charset: A dictionary with mapping between character codes and
unicode
characters. Use the one provided by a dataset.charset.
is_training: If True will create summary prefixes for training job,
otherwise - for evaluation.
...
...
@@ -503,13 +669,14 @@ class Model(object):
# tf.summary.text(sname('text/pr'), pr_text)
# gt_text = charset_mapper.get_text(data.labels[:max_outputs,:])
# tf.summary.text(sname('text/gt'), gt_text)
tf
.
summary
.
image
(
sname
(
'image'
),
data
.
images
,
max_outputs
=
max_outputs
)
tf
.
compat
.
v1
.
summary
.
image
(
sname
(
'image'
),
data
.
images
,
max_outputs
=
max_outputs
)
if
is_training
:
tf
.
summary
.
image
(
sname
(
'image/orig'
),
data
.
images_orig
,
max_outputs
=
max_outputs
)
for
var
in
tf
.
trainable_variables
():
tf
.
summary
.
histogram
(
var
.
op
.
name
,
var
)
tf
.
compat
.
v1
.
summary
.
image
(
sname
(
'image/orig'
),
data
.
images_orig
,
max_outputs
=
max_outputs
)
for
var
in
tf
.
compat
.
v1
.
trainable_variables
():
tf
.
compat
.
v1
.
summary
.
histogram
(
var
.
op
.
name
,
var
)
return
None
else
:
...
...
@@ -520,32 +687,36 @@ class Model(object):
names_to_values
[
name
]
=
value_update_tuple
[
0
]
names_to_updates
[
name
]
=
value_update_tuple
[
1
]
use_metric
(
'CharacterAccuracy'
,
metrics
.
char_accuracy
(
endpoints
.
predicted_chars
,
data
.
labels
,
streaming
=
True
,
rej_char
=
self
.
_params
.
null_code
))
use_metric
(
'CharacterAccuracy'
,
metrics
.
char_accuracy
(
endpoints
.
predicted_chars
,
data
.
labels
,
streaming
=
True
,
rej_char
=
self
.
_params
.
null_code
))
# Sequence accuracy computed by cutting sequence at the first null char
use_metric
(
'SequenceAccuracy'
,
metrics
.
sequence_accuracy
(
endpoints
.
predicted_chars
,
data
.
labels
,
streaming
=
True
,
rej_char
=
self
.
_params
.
null_code
))
use_metric
(
'SequenceAccuracy'
,
metrics
.
sequence_accuracy
(
endpoints
.
predicted_chars
,
data
.
labels
,
streaming
=
True
,
rej_char
=
self
.
_params
.
null_code
))
for
name
,
value
in
names_to_values
.
items
():
summary_name
=
'eval/'
+
name
tf
.
summary
.
scalar
(
summary_name
,
tf
.
Print
(
value
,
[
value
],
summary_name
))
tf
.
compat
.
v1
.
summary
.
scalar
(
summary_name
,
tf
.
compat
.
v1
.
Print
(
value
,
[
value
],
summary_name
))
return
list
(
names_to_updates
.
values
())
def
create_init_fn_to_restore
(
self
,
master_checkpoint
,
def
create_init_fn_to_restore
(
self
,
master_checkpoint
,
inception_checkpoint
=
None
):
"""Creates an init operations to restore weights from various checkpoints.
Args:
master_checkpoint: path to a checkpoint which contains all weights for
the
whole model.
master_checkpoint: path to a checkpoint which contains all weights for
the
whole model.
inception_checkpoint: path to a checkpoint which contains weights for the
inception part only.
...
...
@@ -556,8 +727,8 @@ class Model(object):
all_feed_dict
=
{}
def
assign_from_checkpoint
(
variables
,
checkpoint
):
logging
.
info
(
'Request to re-store %d weights from %s'
,
len
(
variables
),
checkpoint
)
logging
.
info
(
'Request to re-store %d weights from %s'
,
len
(
variables
),
checkpoint
)
if
not
variables
:
logging
.
error
(
'Can
\'
t find any variables to restore.'
)
sys
.
exit
(
1
)
...
...
@@ -565,15 +736,18 @@ class Model(object):
all_assign_ops
.
append
(
assign_op
)
all_feed_dict
.
update
(
feed_dict
)
logging
.
info
(
'variables_to_restore:
\n
%s'
%
utils
.
variables_to_restore
().
keys
())
logging
.
info
(
'moving_average_variables:
\n
%s'
%
[
v
.
op
.
name
for
v
in
tf
.
moving_average_variables
()])
logging
.
info
(
'trainable_variables:
\n
%s'
%
[
v
.
op
.
name
for
v
in
tf
.
trainable_variables
()])
logging
.
info
(
'variables_to_restore:
\n
%s'
,
utils
.
variables_to_restore
().
keys
())
logging
.
info
(
'moving_average_variables:
\n
%s'
,
[
v
.
op
.
name
for
v
in
tf
.
compat
.
v1
.
moving_average_variables
()])
logging
.
info
(
'trainable_variables:
\n
%s'
,
[
v
.
op
.
name
for
v
in
tf
.
compat
.
v1
.
trainable_variables
()])
if
master_checkpoint
:
assign_from_checkpoint
(
utils
.
variables_to_restore
(),
master_checkpoint
)
if
inception_checkpoint
:
variables
=
utils
.
variables_to_restore
(
'AttentionOcr_v1/conv_tower_fn/INCE'
,
strip_scope
=
True
)
'AttentionOcr_v1/conv_tower_fn/INCE'
,
strip_scope
=
True
)
assign_from_checkpoint
(
variables
,
inception_checkpoint
)
def
init_assign_fn
(
sess
):
...
...
research/attention_ocr/python/model_export.py
0 → 100644
View file @
31ca3b97
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Converts existing checkpoint into a SavedModel.
Usage example:
python model_export.py \
--logtostderr --checkpoint=model.ckpt-399731 \
--export_dir=/tmp/attention_ocr_export
"""
import
os
import
tensorflow
as
tf
from
tensorflow
import
app
from
tensorflow.contrib
import
slim
from
tensorflow.python.platform
import
flags
import
common_flags
import
model_export_lib
FLAGS
=
flags
.
FLAGS
common_flags
.
define
()
flags
.
DEFINE_string
(
'export_dir'
,
None
,
'Directory to export model files to.'
)
flags
.
DEFINE_integer
(
'image_width'
,
None
,
'Image width used during training (or crop width if used)'
' If not set, the dataset default is used instead.'
)
flags
.
DEFINE_integer
(
'image_height'
,
None
,
'Image height used during training(or crop height if used)'
' If not set, the dataset default is used instead.'
)
flags
.
DEFINE_string
(
'work_dir'
,
'/tmp'
,
'A directory to store temporary files.'
)
flags
.
DEFINE_integer
(
'version_number'
,
1
,
'Version number of the model'
)
flags
.
DEFINE_bool
(
'export_for_serving'
,
True
,
'Whether the exported model accepts serialized tf.Example '
'protos as input'
)
def
get_checkpoint_path
():
"""Returns a path to a checkpoint based on specified commandline flags.
In order to specify a full path to a checkpoint use --checkpoint flag.
Alternatively, if --train_log_dir was specified it will return a path to the
most recent checkpoint.
Raises:
ValueError: in case it can't find a checkpoint.
Returns:
A string.
"""
if
FLAGS
.
checkpoint
:
return
FLAGS
.
checkpoint
else
:
model_save_path
=
tf
.
train
.
latest_checkpoint
(
FLAGS
.
train_log_dir
)
if
not
model_save_path
:
raise
ValueError
(
'Can
\'
t find a checkpoint in: %s'
%
FLAGS
.
train_log_dir
)
return
model_save_path
def
export_model
(
export_dir
,
export_for_serving
,
batch_size
=
None
,
crop_image_width
=
None
,
crop_image_height
=
None
):
"""Exports a model to the named directory.
Note that --datatset_name and --checkpoint are required and parsed by the
underlying module common_flags.
Args:
export_dir: The output dir where model is exported to.
export_for_serving: If True, expects a serialized image as input and attach
image normalization as part of exported graph.
batch_size: For non-serving export, the input batch_size needs to be
specified.
crop_image_width: Width of the input image. Uses the dataset default if
None.
crop_image_height: Height of the input image. Uses the dataset default if
None.
Returns:
Returns the model signature_def.
"""
# Dataset object used only to get all parameters for the model.
dataset
=
common_flags
.
create_dataset
(
split_name
=
'test'
)
model
=
common_flags
.
create_model
(
dataset
.
num_char_classes
,
dataset
.
max_sequence_length
,
dataset
.
num_of_views
,
dataset
.
null_code
,
charset
=
dataset
.
charset
)
dataset_image_height
,
dataset_image_width
,
image_depth
=
dataset
.
image_shape
# Add check for charmap file
if
not
os
.
path
.
exists
(
dataset
.
charset_file
):
raise
ValueError
(
'No charset defined at {}: export will fail'
.
format
(
dataset
.
charset
))
# Default to dataset dimensions, otherwise use provided dimensions.
image_width
=
crop_image_width
or
dataset_image_width
image_height
=
crop_image_height
or
dataset_image_height
if
export_for_serving
:
images_orig
=
tf
.
compat
.
v1
.
placeholder
(
tf
.
string
,
shape
=
[
batch_size
],
name
=
'tf_example'
)
images_orig_float
=
model_export_lib
.
generate_tfexample_image
(
images_orig
,
image_height
,
image_width
,
image_depth
,
name
=
'float_images'
)
else
:
images_shape
=
(
batch_size
,
image_height
,
image_width
,
image_depth
)
images_orig
=
tf
.
compat
.
v1
.
placeholder
(
tf
.
uint8
,
shape
=
images_shape
,
name
=
'original_image'
)
images_orig_float
=
tf
.
image
.
convert_image_dtype
(
images_orig
,
dtype
=
tf
.
float32
,
name
=
'float_images'
)
endpoints
=
model
.
create_base
(
images_orig_float
,
labels_one_hot
=
None
)
sess
=
tf
.
compat
.
v1
.
Session
()
saver
=
tf
.
compat
.
v1
.
train
.
Saver
(
slim
.
get_variables_to_restore
(),
sharded
=
True
)
saver
.
restore
(
sess
,
get_checkpoint_path
())
tf
.
compat
.
v1
.
logging
.
info
(
'Model restored successfully.'
)
# Create model signature.
if
export_for_serving
:
input_tensors
=
{
tf
.
saved_model
.
CLASSIFY_INPUTS
:
images_orig
}
else
:
input_tensors
=
{
'images'
:
images_orig
}
signature_inputs
=
model_export_lib
.
build_tensor_info
(
input_tensors
)
# NOTE: Tensors 'image_float' and 'chars_logit' are used by the inference
# or to compute saliency maps.
output_tensors
=
{
'images_float'
:
images_orig_float
,
'predictions'
:
endpoints
.
predicted_chars
,
'scores'
:
endpoints
.
predicted_scores
,
'chars_logit'
:
endpoints
.
chars_logit
,
'predicted_length'
:
endpoints
.
predicted_length
,
'predicted_text'
:
endpoints
.
predicted_text
,
'predicted_conf'
:
endpoints
.
predicted_conf
,
'normalized_seq_conf'
:
endpoints
.
normalized_seq_conf
}
for
i
,
t
in
enumerate
(
model_export_lib
.
attention_ocr_attention_masks
(
dataset
.
max_sequence_length
)):
output_tensors
[
'attention_mask_%d'
%
i
]
=
t
signature_outputs
=
model_export_lib
.
build_tensor_info
(
output_tensors
)
signature_def
=
tf
.
compat
.
v1
.
saved_model
.
signature_def_utils
.
build_signature_def
(
signature_inputs
,
signature_outputs
,
tf
.
saved_model
.
CLASSIFY_METHOD_NAME
)
# Save model.
builder
=
tf
.
compat
.
v1
.
saved_model
.
builder
.
SavedModelBuilder
(
export_dir
)
builder
.
add_meta_graph_and_variables
(
sess
,
[
tf
.
saved_model
.
SERVING
],
signature_def_map
=
{
tf
.
saved_model
.
DEFAULT_SERVING_SIGNATURE_DEF_KEY
:
signature_def
},
main_op
=
tf
.
compat
.
v1
.
tables_initializer
(),
strip_default_attrs
=
True
)
builder
.
save
()
tf
.
compat
.
v1
.
logging
.
info
(
'Model has been exported to %s'
%
export_dir
)
return
signature_def
def
main
(
unused_argv
):
if
os
.
path
.
exists
(
FLAGS
.
export_dir
):
raise
ValueError
(
'export_dir already exists: exporting will fail'
)
export_model
(
FLAGS
.
export_dir
,
FLAGS
.
export_for_serving
,
FLAGS
.
batch_size
,
FLAGS
.
image_width
,
FLAGS
.
image_height
)
if
__name__
==
'__main__'
:
flags
.
mark_flag_as_required
(
'dataset_name'
)
flags
.
mark_flag_as_required
(
'export_dir'
)
app
.
run
(
main
)
research/attention_ocr/python/model_export_lib.py
0 → 100644
View file @
31ca3b97
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions for exporting Attention OCR model."""
import
tensorflow
as
tf
# Function borrowed from research/object_detection/core/preprocessor.py
def
normalize_image
(
image
,
original_minval
,
original_maxval
,
target_minval
,
target_maxval
):
"""Normalizes pixel values in the image.
Moves the pixel values from the current [original_minval, original_maxval]
range to a the [target_minval, target_maxval] range.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width,
channels].
original_minval: current image minimum value.
original_maxval: current image maximum value.
target_minval: target image minimum value.
target_maxval: target image maximum value.
Returns:
image: image which is the same shape as input image.
"""
with
tf
.
compat
.
v1
.
name_scope
(
'NormalizeImage'
,
values
=
[
image
]):
original_minval
=
float
(
original_minval
)
original_maxval
=
float
(
original_maxval
)
target_minval
=
float
(
target_minval
)
target_maxval
=
float
(
target_maxval
)
image
=
tf
.
cast
(
image
,
dtype
=
tf
.
float32
)
image
=
tf
.
subtract
(
image
,
original_minval
)
image
=
tf
.
multiply
(
image
,
(
target_maxval
-
target_minval
)
/
(
original_maxval
-
original_minval
))
image
=
tf
.
add
(
image
,
target_minval
)
return
image
def
generate_tfexample_image
(
input_example_strings
,
image_height
,
image_width
,
image_channels
,
name
=
None
):
"""Parses a 1D tensor of serialized tf.Example protos and returns image batch.
Args:
input_example_strings: A 1-Dimensional tensor of size [batch_size] and type
tf.string containing a serialized Example proto per image.
image_height: First image dimension.
image_width: Second image dimension.
image_channels: Third image dimension.
name: optional tensor name.
Returns:
A tensor with shape [batch_size, height, width, channels] of type float32
with values in the range [0..1]
"""
batch_size
=
tf
.
shape
(
input
=
input_example_strings
)[
0
]
images_shape
=
tf
.
stack
(
[
batch_size
,
image_height
,
image_width
,
image_channels
])
tf_example_image_key
=
'image/encoded'
feature_configs
=
{
tf_example_image_key
:
tf
.
io
.
FixedLenFeature
(
image_height
*
image_width
*
image_channels
,
dtype
=
tf
.
float32
)
}
feature_tensors
=
tf
.
io
.
parse_example
(
serialized
=
input_example_strings
,
features
=
feature_configs
)
float_images
=
tf
.
reshape
(
normalize_image
(
feature_tensors
[
tf_example_image_key
],
original_minval
=
0.0
,
original_maxval
=
255.0
,
target_minval
=
0.0
,
target_maxval
=
1.0
),
images_shape
,
name
=
name
)
return
float_images
def
attention_ocr_attention_masks
(
num_characters
):
# TODO(gorban): use tensors directly after replacing LSTM unroll methods.
prefix
=
(
'AttentionOcr_v1/'
'sequence_logit_fn/SQLR/LSTM/attention_decoder/Attention_0'
)
names
=
[
'%s/Softmax:0'
%
(
prefix
)]
for
i
in
range
(
1
,
num_characters
):
names
+=
[
'%s_%d/Softmax:0'
%
(
prefix
,
i
)]
return
[
tf
.
compat
.
v1
.
get_default_graph
().
get_tensor_by_name
(
n
)
for
n
in
names
]
def
build_tensor_info
(
tensor_dict
):
return
{
k
:
tf
.
compat
.
v1
.
saved_model
.
utils
.
build_tensor_info
(
t
)
for
k
,
t
in
tensor_dict
.
items
()
}
research/attention_ocr/python/model_export_test.py
0 → 100644
View file @
31ca3b97
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for model_export."""
import
os
import
numpy
as
np
from
absl.testing
import
flagsaver
import
tensorflow
as
tf
import
common_flags
import
model_export
_CHECKPOINT
=
'model.ckpt-399731'
_CHECKPOINT_URL
=
(
'http://download.tensorflow.org/models/attention_ocr_2017_08_09.tar.gz'
)
def
_clean_up
():
tf
.
io
.
gfile
.
rmtree
(
tf
.
compat
.
v1
.
test
.
get_temp_dir
())
def
_create_tf_example_string
(
image
):
"""Create a serialized tf.Example proto for feeding the model."""
example
=
tf
.
train
.
Example
()
example
.
features
.
feature
[
'image/encoded'
].
float_list
.
value
.
extend
(
list
(
np
.
reshape
(
image
,
(
-
1
))))
return
example
.
SerializeToString
()
class
AttentionOcrExportTest
(
tf
.
test
.
TestCase
):
"""Tests for model_export.export_model."""
def
setUp
(
self
):
for
suffix
in
[
'.meta'
,
'.index'
,
'.data-00000-of-00001'
]:
filename
=
_CHECKPOINT
+
suffix
self
.
assertTrue
(
tf
.
io
.
gfile
.
exists
(
filename
),
msg
=
'Missing checkpoint file %s. '
'Please download and extract it from %s'
%
(
filename
,
_CHECKPOINT_URL
))
tf
.
flags
.
FLAGS
.
dataset_name
=
'fsns'
tf
.
flags
.
FLAGS
.
checkpoint
=
_CHECKPOINT
tf
.
flags
.
FLAGS
.
dataset_dir
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'datasets/testdata/fsns'
)
tf
.
test
.
TestCase
.
setUp
(
self
)
_clean_up
()
self
.
export_dir
=
os
.
path
.
join
(
tf
.
compat
.
v1
.
test
.
get_temp_dir
(),
'exported_model'
)
self
.
minimal_output_signature
=
{
'predictions'
:
'AttentionOcr_v1/predicted_chars:0'
,
'scores'
:
'AttentionOcr_v1/predicted_scores:0'
,
'predicted_length'
:
'AttentionOcr_v1/predicted_length:0'
,
'predicted_text'
:
'AttentionOcr_v1/predicted_text:0'
,
'predicted_conf'
:
'AttentionOcr_v1/predicted_conf:0'
,
'normalized_seq_conf'
:
'AttentionOcr_v1/normalized_seq_conf:0'
}
def
create_input_feed
(
self
,
graph_def
,
serving
):
"""Returns the input feed for the model.
Creates random images, according to the size specified by dataset_name,
format it in the correct way depending on whether the model was exported
for serving, and return the correctly keyed feed_dict for inference.
Args:
graph_def: Graph definition of the loaded model.
serving: Whether the model was exported for Serving.
Returns:
The feed_dict suitable for model inference.
"""
# Creates a dataset based on FLAGS.dataset_name.
self
.
dataset
=
common_flags
.
create_dataset
(
'test'
)
# Create some random images to test inference for any dataset.
self
.
images
=
{
'img1'
:
np
.
random
.
uniform
(
low
=
64
,
high
=
192
,
size
=
self
.
dataset
.
image_shape
).
astype
(
'uint8'
),
'img2'
:
np
.
random
.
uniform
(
low
=
32
,
high
=
224
,
size
=
self
.
dataset
.
image_shape
).
astype
(
'uint8'
),
}
signature_def
=
graph_def
.
signature_def
[
tf
.
saved_model
.
DEFAULT_SERVING_SIGNATURE_DEF_KEY
]
if
serving
:
input_name
=
signature_def
.
inputs
[
tf
.
saved_model
.
CLASSIFY_INPUTS
].
name
# Model for serving takes input: inputs['inputs'] = 'tf_example:0'
feed_dict
=
{
input_name
:
[
_create_tf_example_string
(
self
.
images
[
'img1'
]),
_create_tf_example_string
(
self
.
images
[
'img2'
])
]
}
else
:
input_name
=
signature_def
.
inputs
[
'images'
].
name
# Model for direct use takes input: inputs['images'] = 'original_image:0'
feed_dict
=
{
input_name
:
np
.
stack
([
self
.
images
[
'img1'
],
self
.
images
[
'img2'
]])
}
return
feed_dict
def
verify_export_load_and_inference
(
self
,
export_for_serving
=
False
):
"""Verify exported model can be loaded and inference can run successfully.
This function will load the exported model in self.export_dir, then create
some fake images according to the specification of FLAGS.dataset_name.
It then feeds the input through the model, and verify the minimal set of
output signatures are present.
Note: Model and dataset creation in the underlying library depends on the
following commandline flags:
FLAGS.dataset_name
Args:
export_for_serving: True if the model was exported for Serving. This
affects how input is fed into the model.
"""
tf
.
compat
.
v1
.
reset_default_graph
()
sess
=
tf
.
compat
.
v1
.
Session
()
graph_def
=
tf
.
compat
.
v1
.
saved_model
.
loader
.
load
(
sess
=
sess
,
tags
=
[
tf
.
saved_model
.
SERVING
],
export_dir
=
self
.
export_dir
)
feed_dict
=
self
.
create_input_feed
(
graph_def
,
export_for_serving
)
results
=
sess
.
run
(
self
.
minimal_output_signature
,
feed_dict
=
feed_dict
)
out_shape
=
(
2
,)
self
.
assertEqual
(
np
.
shape
(
results
[
'predicted_conf'
]),
out_shape
)
self
.
assertEqual
(
np
.
shape
(
results
[
'predicted_text'
]),
out_shape
)
self
.
assertEqual
(
np
.
shape
(
results
[
'predicted_length'
]),
out_shape
)
self
.
assertEqual
(
np
.
shape
(
results
[
'normalized_seq_conf'
]),
out_shape
)
out_shape
=
(
2
,
self
.
dataset
.
max_sequence_length
)
self
.
assertEqual
(
np
.
shape
(
results
[
'scores'
]),
out_shape
)
self
.
assertEqual
(
np
.
shape
(
results
[
'predictions'
]),
out_shape
)
@
flagsaver
.
flagsaver
def
test_fsns_export_for_serving_and_load_inference
(
self
):
model_export
.
export_model
(
self
.
export_dir
,
True
)
self
.
verify_export_load_and_inference
(
True
)
@
flagsaver
.
flagsaver
def
test_fsns_export_and_load_inference
(
self
):
model_export
.
export_model
(
self
.
export_dir
,
False
,
batch_size
=
2
)
self
.
verify_export_load_and_inference
(
False
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/attention_ocr/python/model_test.py
View file @
31ca3b97
...
...
@@ -12,11 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for the model."""
import
string
import
numpy
as
np
import
string
import
tensorflow
as
tf
from
tensorflow.contrib
import
slim
...
...
@@ -32,6 +31,7 @@ def create_fake_charset(num_char_classes):
class
ModelTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
tf
.
test
.
TestCase
.
setUp
(
self
)
...
...
@@ -51,18 +51,21 @@ class ModelTest(tf.test.TestCase):
self
.
chars_logit_shape
=
(
self
.
batch_size
,
self
.
seq_length
,
self
.
num_char_classes
)
self
.
length_logit_shape
=
(
self
.
batch_size
,
self
.
seq_length
+
1
)
# Placeholder knows image dimensions, but not batch size.
self
.
input_images
=
tf
.
compat
.
v1
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
self
.
image_height
,
self
.
image_width
,
3
),
name
=
'input_node'
)
self
.
initialize_fakes
()
def
initialize_fakes
(
self
):
self
.
images_shape
=
(
self
.
batch_size
,
self
.
image_height
,
self
.
image_width
,
3
)
self
.
fake_images
=
tf
.
constant
(
self
.
rng
.
randint
(
low
=
0
,
high
=
255
,
size
=
self
.
images_shape
).
astype
(
'float32'
),
name
=
'input_node'
)
self
.
fake_conv_tower_np
=
self
.
rng
.
randn
(
*
self
.
conv_tower_shape
).
astype
(
'float32'
)
self
.
fake_images
=
self
.
rng
.
randint
(
low
=
0
,
high
=
255
,
size
=
self
.
images_shape
).
astype
(
'float32'
)
self
.
fake_conv_tower_np
=
self
.
rng
.
randn
(
*
self
.
conv_tower_shape
).
astype
(
'float32'
)
self
.
fake_conv_tower
=
tf
.
constant
(
self
.
fake_conv_tower_np
)
self
.
fake_logits
=
tf
.
constant
(
self
.
rng
.
randn
(
*
self
.
chars_logit_shape
).
astype
(
'float32'
))
...
...
@@ -74,33 +77,44 @@ class ModelTest(tf.test.TestCase):
def
create_model
(
self
,
charset
=
None
):
return
model
.
Model
(
self
.
num_char_classes
,
self
.
seq_length
,
num_views
=
4
,
null_code
=
62
,
self
.
num_char_classes
,
self
.
seq_length
,
num_views
=
4
,
null_code
=
62
,
charset
=
charset
)
def
test_char_related_shapes
(
self
):
ocr_model
=
self
.
create_model
()
charset
=
create_fake_charset
(
self
.
num_char_classes
)
ocr_model
=
self
.
create_model
(
charset
=
charset
)
with
self
.
test_session
()
as
sess
:
endpoints_tf
=
ocr_model
.
create_base
(
images
=
self
.
fake_images
,
labels_one_hot
=
None
)
sess
.
run
(
tf
.
global_variables_initializer
())
endpoints
=
sess
.
run
(
endpoints_tf
)
self
.
assertEqual
((
self
.
batch_size
,
self
.
seq_length
,
self
.
num_char_classes
),
endpoints
.
chars_logit
.
shape
)
self
.
assertEqual
((
self
.
batch_size
,
self
.
seq_length
,
self
.
num_char_classes
),
endpoints
.
chars_log_prob
.
shape
)
images
=
self
.
input_images
,
labels_one_hot
=
None
)
sess
.
run
(
tf
.
compat
.
v1
.
global_variables_initializer
())
tf
.
compat
.
v1
.
tables_initializer
().
run
()
endpoints
=
sess
.
run
(
endpoints_tf
,
feed_dict
=
{
self
.
input_images
:
self
.
fake_images
})
self
.
assertEqual
(
(
self
.
batch_size
,
self
.
seq_length
,
self
.
num_char_classes
),
endpoints
.
chars_logit
.
shape
)
self
.
assertEqual
(
(
self
.
batch_size
,
self
.
seq_length
,
self
.
num_char_classes
),
endpoints
.
chars_log_prob
.
shape
)
self
.
assertEqual
((
self
.
batch_size
,
self
.
seq_length
),
endpoints
.
predicted_chars
.
shape
)
self
.
assertEqual
((
self
.
batch_size
,
self
.
seq_length
),
endpoints
.
predicted_scores
.
shape
)
self
.
assertEqual
((
self
.
batch_size
,),
endpoints
.
predicted_text
.
shape
)
self
.
assertEqual
((
self
.
batch_size
,),
endpoints
.
predicted_conf
.
shape
)
self
.
assertEqual
((
self
.
batch_size
,),
endpoints
.
normalized_seq_conf
.
shape
)
def
test_predicted_scores_are_within_range
(
self
):
ocr_model
=
self
.
create_model
()
_
,
_
,
scores
=
ocr_model
.
char_predictions
(
self
.
fake_logits
)
with
self
.
test_session
()
as
sess
:
scores_np
=
sess
.
run
(
scores
)
scores_np
=
sess
.
run
(
scores
,
feed_dict
=
{
self
.
input_images
:
self
.
fake_images
})
values_in_range
=
(
scores_np
>=
0.0
)
&
(
scores_np
<=
1.0
)
self
.
assertTrue
(
...
...
@@ -111,10 +125,11 @@ class ModelTest(tf.test.TestCase):
def
test_conv_tower_shape
(
self
):
with
self
.
test_session
()
as
sess
:
ocr_model
=
self
.
create_model
()
conv_tower
=
ocr_model
.
conv_tower_fn
(
self
.
fake
_images
)
conv_tower
=
ocr_model
.
conv_tower_fn
(
self
.
input
_images
)
sess
.
run
(
tf
.
global_variables_initializer
())
conv_tower_np
=
sess
.
run
(
conv_tower
)
sess
.
run
(
tf
.
compat
.
v1
.
global_variables_initializer
())
conv_tower_np
=
sess
.
run
(
conv_tower
,
feed_dict
=
{
self
.
input_images
:
self
.
fake_images
})
self
.
assertEqual
(
self
.
conv_tower_shape
,
conv_tower_np
.
shape
)
...
...
@@ -124,11 +139,12 @@ class ModelTest(tf.test.TestCase):
# updates, gradients and variances. It also depends on the type of used
# optimizer.
ocr_model
=
self
.
create_model
()
ocr_model
.
create_base
(
images
=
self
.
fake
_images
,
labels_one_hot
=
None
)
ocr_model
.
create_base
(
images
=
self
.
input
_images
,
labels_one_hot
=
None
)
with
self
.
test_session
()
as
sess
:
tfprof_root
=
tf
.
profiler
.
profile
(
tfprof_root
=
tf
.
compat
.
v1
.
profiler
.
profile
(
sess
.
graph
,
options
=
tf
.
profiler
.
ProfileOptionBuilder
.
trainable_variables_parameter
())
options
=
tf
.
compat
.
v1
.
profiler
.
ProfileOptionBuilder
.
trainable_variables_parameter
())
model_size_bytes
=
4
*
tfprof_root
.
total_parameters
self
.
assertLess
(
model_size_bytes
,
1
*
2
**
30
)
...
...
@@ -147,9 +163,9 @@ class ModelTest(tf.test.TestCase):
summaries
=
ocr_model
.
create_summaries
(
data
,
endpoints
,
charset
,
is_training
=
False
)
with
self
.
test_session
()
as
sess
:
sess
.
run
(
tf
.
global_variables_initializer
())
sess
.
run
(
tf
.
local_variables_initializer
())
tf
.
tables_initializer
().
run
()
sess
.
run
(
tf
.
compat
.
v1
.
global_variables_initializer
())
sess
.
run
(
tf
.
compat
.
v1
.
local_variables_initializer
())
tf
.
compat
.
v1
.
tables_initializer
().
run
()
sess
.
run
(
summaries
)
# just check it is runnable
def
test_sequence_loss_function_without_label_smoothing
(
self
):
...
...
@@ -158,7 +174,7 @@ class ModelTest(tf.test.TestCase):
loss
=
model
.
sequence_loss_fn
(
self
.
fake_logits
,
self
.
fake_labels
)
with
self
.
test_session
()
as
sess
:
loss_np
=
sess
.
run
(
loss
)
loss_np
=
sess
.
run
(
loss
,
feed_dict
=
{
self
.
input_images
:
self
.
fake_images
}
)
# This test checks that the loss function is 'runnable'.
self
.
assertEqual
(
loss_np
.
shape
,
tuple
())
...
...
@@ -172,19 +188,21 @@ class ModelTest(tf.test.TestCase):
Returns:
a list of tensors with encoded image coordinates in them.
"""
batch_size
,
h
,
w
,
_
=
net
.
shape
.
as_list
()
batch_size
=
tf
.
shape
(
input
=
net
)[
0
]
_
,
h
,
w
,
_
=
net
.
shape
.
as_list
()
h_loc
=
[
tf
.
tile
(
tf
.
reshape
(
tf
.
contrib
.
layers
.
one_hot_encoding
(
tf
.
constant
([
i
]),
num_classes
=
h
),
[
h
,
1
]),
[
1
,
w
])
for
i
in
range
(
h
)
tf
.
tile
(
tf
.
reshape
(
tf
.
contrib
.
layers
.
one_hot_encoding
(
tf
.
constant
([
i
]),
num_classes
=
h
),
[
h
,
1
]),
[
1
,
w
])
for
i
in
range
(
h
)
]
h_loc
=
tf
.
concat
([
tf
.
expand_dims
(
t
,
2
)
for
t
in
h_loc
],
2
)
w_loc
=
[
tf
.
tile
(
tf
.
contrib
.
layers
.
one_hot_encoding
(
tf
.
constant
([
i
]),
num_classes
=
w
),
[
h
,
1
])
for
i
in
range
(
w
)
tf
.
tile
(
tf
.
contrib
.
layers
.
one_hot_encoding
(
tf
.
constant
([
i
]),
num_classes
=
w
),
[
h
,
1
])
for
i
in
range
(
w
)
]
w_loc
=
tf
.
concat
([
tf
.
expand_dims
(
t
,
2
)
for
t
in
w_loc
],
2
)
loc
=
tf
.
concat
([
h_loc
,
w_loc
],
2
)
...
...
@@ -197,11 +215,12 @@ class ModelTest(tf.test.TestCase):
conv_w_coords_tf
=
model
.
encode_coordinates_fn
(
self
.
fake_conv_tower
)
with
self
.
test_session
()
as
sess
:
conv_w_coords
=
sess
.
run
(
conv_w_coords_tf
)
conv_w_coords
=
sess
.
run
(
conv_w_coords_tf
,
feed_dict
=
{
self
.
input_images
:
self
.
fake_images
})
batch_size
,
height
,
width
,
feature_size
=
self
.
conv_tower_shape
self
.
assertEqual
(
conv_w_coords
.
shape
,
(
batch_size
,
height
,
width
,
feature_size
+
height
+
width
))
self
.
assertEqual
(
conv_w_coords
.
shape
,
(
batch_size
,
height
,
width
,
feature_size
+
height
+
width
))
def
test_disabled_coordinate_encoding_returns_features_unchanged
(
self
):
model
=
self
.
create_model
()
...
...
@@ -209,7 +228,8 @@ class ModelTest(tf.test.TestCase):
conv_w_coords_tf
=
model
.
encode_coordinates_fn
(
self
.
fake_conv_tower
)
with
self
.
test_session
()
as
sess
:
conv_w_coords
=
sess
.
run
(
conv_w_coords_tf
)
conv_w_coords
=
sess
.
run
(
conv_w_coords_tf
,
feed_dict
=
{
self
.
input_images
:
self
.
fake_images
})
self
.
assertAllEqual
(
conv_w_coords
,
self
.
fake_conv_tower_np
)
...
...
@@ -221,7 +241,8 @@ class ModelTest(tf.test.TestCase):
conv_w_coords_tf
=
model
.
encode_coordinates_fn
(
fake_conv_tower
)
with
self
.
test_session
()
as
sess
:
conv_w_coords
=
sess
.
run
(
conv_w_coords_tf
)
conv_w_coords
=
sess
.
run
(
conv_w_coords_tf
,
feed_dict
=
{
self
.
input_images
:
self
.
fake_images
})
# Original features
self
.
assertAllEqual
(
conv_w_coords
[
0
,
:,
:,
:
4
],
...
...
@@ -252,8 +273,8 @@ class ModelTest(tf.test.TestCase):
endpoints_tf
=
ocr_model
.
create_base
(
images
=
self
.
fake_images
,
labels_one_hot
=
None
)
sess
.
run
(
tf
.
global_variables_initializer
())
tf
.
tables_initializer
().
run
()
sess
.
run
(
tf
.
compat
.
v1
.
global_variables_initializer
())
tf
.
compat
.
v1
.
tables_initializer
().
run
()
endpoints
=
sess
.
run
(
endpoints_tf
)
self
.
assertEqual
(
endpoints
.
predicted_text
.
shape
,
(
self
.
batch_size
,))
...
...
@@ -261,14 +282,15 @@ class ModelTest(tf.test.TestCase):
class
CharsetMapperTest
(
tf
.
test
.
TestCase
):
def
test_text_corresponds_to_ids
(
self
):
charset
=
create_fake_charset
(
36
)
ids
=
tf
.
constant
(
[[
17
,
14
,
21
,
21
,
24
],
[
32
,
24
,
27
,
21
,
13
]],
dtype
=
tf
.
int64
)
ids
=
tf
.
constant
(
[[
17
,
14
,
21
,
21
,
24
],
[
32
,
24
,
27
,
21
,
13
]],
dtype
=
tf
.
int64
)
charset_mapper
=
model
.
CharsetMapper
(
charset
)
with
self
.
test_session
()
as
sess
:
tf
.
tables_initializer
().
run
()
tf
.
compat
.
v1
.
tables_initializer
().
run
()
text
=
sess
.
run
(
charset_mapper
.
get_text
(
ids
))
self
.
assertAllEqual
(
text
,
[
b
'hello'
,
b
'world'
])
...
...
research/attention_ocr/python/sequence_layers.py
View file @
31ca3b97
...
...
@@ -111,12 +111,12 @@ class SequenceLayerBase(object):
self
.
_mparams
=
method_params
self
.
_net
=
net
self
.
_labels_one_hot
=
labels_one_hot
self
.
_batch_size
=
net
.
get_shape
().
dims
[
0
].
value
self
.
_batch_size
=
tf
.
shape
(
input
=
net
)[
0
]
# Initialize parameters for char logits which will be computed on the fly
# inside an LSTM decoder.
self
.
_char_logits
=
{}
regularizer
=
slim
.
l2_
regularizer
(
self
.
_mparams
.
weight_decay
)
regularizer
=
tf
.
keras
.
regularizer
s
.
l2
(
0.5
*
(
self
.
_mparams
.
weight_decay
)
)
self
.
_softmax_w
=
slim
.
model_variable
(
'softmax_w'
,
[
self
.
_mparams
.
num_lstm_units
,
self
.
_params
.
num_char_classes
],
...
...
@@ -124,7 +124,7 @@ class SequenceLayerBase(object):
regularizer
=
regularizer
)
self
.
_softmax_b
=
slim
.
model_variable
(
'softmax_b'
,
[
self
.
_params
.
num_char_classes
],
initializer
=
tf
.
zeros_initializer
(),
initializer
=
tf
.
compat
.
v1
.
zeros_initializer
(),
regularizer
=
regularizer
)
@
abc
.
abstractmethod
...
...
@@ -203,8 +203,8 @@ class SequenceLayerBase(object):
A tensor with shape [batch_size, num_char_classes]
"""
if
char_index
not
in
self
.
_char_logits
:
self
.
_char_logits
[
char_index
]
=
tf
.
nn
.
xw_plus_b
(
inputs
,
self
.
_softmax_w
,
self
.
_softmax_b
)
self
.
_char_logits
[
char_index
]
=
tf
.
compat
.
v1
.
nn
.
xw_plus_b
(
inputs
,
self
.
_softmax_w
,
self
.
_softmax_b
)
return
self
.
_char_logits
[
char_index
]
def
char_one_hot
(
self
,
logit
):
...
...
@@ -216,7 +216,7 @@ class SequenceLayerBase(object):
Returns:
A tensor with shape [batch_size, num_char_classes]
"""
prediction
=
tf
.
argmax
(
logit
,
axis
=
1
)
prediction
=
tf
.
argmax
(
input
=
logit
,
axis
=
1
)
return
slim
.
one_hot_encoding
(
prediction
,
self
.
_params
.
num_char_classes
)
def
get_input
(
self
,
prev
,
i
):
...
...
@@ -244,10 +244,10 @@ class SequenceLayerBase(object):
Returns:
A tensor with shape [batch_size, seq_length, num_char_classes].
"""
with
tf
.
variable_scope
(
'LSTM'
):
with
tf
.
compat
.
v1
.
variable_scope
(
'LSTM'
):
first_label
=
self
.
get_input
(
prev
=
None
,
i
=
0
)
decoder_inputs
=
[
first_label
]
+
[
None
]
*
(
self
.
_params
.
seq_length
-
1
)
lstm_cell
=
tf
.
co
ntrib
.
rnn
.
LSTMCell
(
lstm_cell
=
tf
.
co
mpat
.
v1
.
nn
.
rnn_cell
.
LSTMCell
(
self
.
_mparams
.
num_lstm_units
,
use_peepholes
=
False
,
cell_clip
=
self
.
_mparams
.
lstm_state_clip_value
,
...
...
@@ -259,9 +259,9 @@ class SequenceLayerBase(object):
loop_function
=
self
.
get_input
,
cell
=
lstm_cell
)
with
tf
.
variable_scope
(
'logits'
):
with
tf
.
compat
.
v1
.
variable_scope
(
'logits'
):
logits_list
=
[
tf
.
expand_dims
(
self
.
char_logit
(
logit
,
i
),
dim
=
1
)
tf
.
expand_dims
(
self
.
char_logit
(
logit
,
i
),
axis
=
1
)
for
i
,
logit
in
enumerate
(
lstm_outputs
)
]
...
...
@@ -275,7 +275,7 @@ class NetSlice(SequenceLayerBase):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
NetSlice
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
_zero_label
=
tf
.
zeros
(
[
self
.
_batch_size
,
self
.
_params
.
num_char_classes
])
tf
.
stack
(
[
self
.
_batch_size
,
self
.
_params
.
num_char_classes
])
)
def
get_image_feature
(
self
,
char_index
):
"""Returns a subset of image features for a character.
...
...
@@ -352,7 +352,7 @@ class Attention(SequenceLayerBase):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
Attention
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
_zero_label
=
tf
.
zeros
(
[
self
.
_batch_size
,
self
.
_params
.
num_char_classes
])
tf
.
stack
(
[
self
.
_batch_size
,
self
.
_params
.
num_char_classes
])
)
def
get_eval_input
(
self
,
prev
,
i
):
"""See SequenceLayerBase.get_eval_input for details."""
...
...
research/attention_ocr/python/sequence_layers_test.py
View file @
31ca3b97
...
...
@@ -29,13 +29,13 @@ import sequence_layers
def
fake_net
(
batch_size
,
num_features
,
feature_size
):
return
tf
.
convert_to_tensor
(
np
.
random
.
uniform
(
size
=
(
batch_size
,
num_features
,
feature_size
)),
value
=
np
.
random
.
uniform
(
size
=
(
batch_size
,
num_features
,
feature_size
)),
dtype
=
tf
.
float32
)
def
fake_labels
(
batch_size
,
seq_length
,
num_char_classes
):
labels_np
=
tf
.
convert_to_tensor
(
np
.
random
.
randint
(
value
=
np
.
random
.
randint
(
low
=
0
,
high
=
num_char_classes
,
size
=
(
batch_size
,
seq_length
)))
return
slim
.
one_hot_encoding
(
labels_np
,
num_classes
=
num_char_classes
)
...
...
research/attention_ocr/python/train.py
View file @
31ca3b97
...
...
@@ -96,16 +96,16 @@ def get_training_hparams():
def
create_optimizer
(
hparams
):
"""Creates optimized based on the specified flags."""
if
hparams
.
optimizer
==
'momentum'
:
optimizer
=
tf
.
train
.
MomentumOptimizer
(
optimizer
=
tf
.
compat
.
v1
.
train
.
MomentumOptimizer
(
hparams
.
learning_rate
,
momentum
=
hparams
.
momentum
)
elif
hparams
.
optimizer
==
'adam'
:
optimizer
=
tf
.
train
.
AdamOptimizer
(
hparams
.
learning_rate
)
optimizer
=
tf
.
compat
.
v1
.
train
.
AdamOptimizer
(
hparams
.
learning_rate
)
elif
hparams
.
optimizer
==
'adadelta'
:
optimizer
=
tf
.
train
.
AdadeltaOptimizer
(
hparams
.
learning_rate
)
optimizer
=
tf
.
compat
.
v1
.
train
.
AdadeltaOptimizer
(
hparams
.
learning_rate
)
elif
hparams
.
optimizer
==
'adagrad'
:
optimizer
=
tf
.
train
.
AdagradOptimizer
(
hparams
.
learning_rate
)
optimizer
=
tf
.
compat
.
v1
.
train
.
AdagradOptimizer
(
hparams
.
learning_rate
)
elif
hparams
.
optimizer
==
'rmsprop'
:
optimizer
=
tf
.
train
.
RMSPropOptimizer
(
optimizer
=
tf
.
compat
.
v1
.
train
.
RMSPropOptimizer
(
hparams
.
learning_rate
,
momentum
=
hparams
.
momentum
)
return
optimizer
...
...
@@ -154,14 +154,14 @@ def train(loss, init_fn, hparams):
def
prepare_training_dir
():
if
not
tf
.
gfile
.
E
xists
(
FLAGS
.
train_log_dir
):
if
not
tf
.
io
.
gfile
.
e
xists
(
FLAGS
.
train_log_dir
):
logging
.
info
(
'Create a new training directory %s'
,
FLAGS
.
train_log_dir
)
tf
.
gfile
.
M
ake
D
irs
(
FLAGS
.
train_log_dir
)
tf
.
io
.
gfile
.
m
ake
d
irs
(
FLAGS
.
train_log_dir
)
else
:
if
FLAGS
.
reset_train_dir
:
logging
.
info
(
'Reset the training directory %s'
,
FLAGS
.
train_log_dir
)
tf
.
gfile
.
DeleteRecursively
(
FLAGS
.
train_log_dir
)
tf
.
gfile
.
M
ake
D
irs
(
FLAGS
.
train_log_dir
)
tf
.
io
.
gfile
.
rmtree
(
FLAGS
.
train_log_dir
)
tf
.
io
.
gfile
.
m
ake
d
irs
(
FLAGS
.
train_log_dir
)
else
:
logging
.
info
(
'Use already existing training directory %s'
,
FLAGS
.
train_log_dir
)
...
...
@@ -169,7 +169,7 @@ def prepare_training_dir():
def
calculate_graph_metrics
():
param_stats
=
model_analyzer
.
print_model_analysis
(
tf
.
get_default_graph
(),
tf
.
compat
.
v1
.
get_default_graph
(),
tfprof_options
=
model_analyzer
.
TRAINABLE_VARS_PARAMS_STAT_OPTIONS
)
return
param_stats
.
total_parameters
...
...
@@ -186,7 +186,7 @@ def main(_):
# If ps_tasks is zero, the local device is used. When using multiple
# (non-local) replicas, the ReplicaDeviceSetter distributes the variables
# across the different devices.
device_setter
=
tf
.
train
.
replica_device_setter
(
device_setter
=
tf
.
compat
.
v1
.
train
.
replica_device_setter
(
FLAGS
.
ps_tasks
,
merge_devices
=
True
)
with
tf
.
device
(
device_setter
):
data
=
data_provider
.
get_data
(
...
...
research/attention_ocr/python/utils.py
View file @
31ca3b97
...
...
@@ -37,16 +37,16 @@ def logits_to_log_prob(logits):
probabilities.
"""
with
tf
.
variable_scope
(
'log_probabilities'
):
with
tf
.
compat
.
v1
.
variable_scope
(
'log_probabilities'
):
reduction_indices
=
len
(
logits
.
shape
.
as_list
())
-
1
max_logits
=
tf
.
reduce_max
(
logits
,
reduction_indice
s
=
reduction_indices
,
keep
_
dims
=
True
)
input_tensor
=
logits
,
axi
s
=
reduction_indices
,
keepdims
=
True
)
safe_logits
=
tf
.
subtract
(
logits
,
max_logits
)
sum_exp
=
tf
.
reduce_sum
(
tf
.
exp
(
safe_logits
),
reduction_indice
s
=
reduction_indices
,
keep
_
dims
=
True
)
log_probs
=
tf
.
subtract
(
safe_logits
,
tf
.
log
(
sum_exp
))
input_tensor
=
tf
.
exp
(
safe_logits
),
axi
s
=
reduction_indices
,
keepdims
=
True
)
log_probs
=
tf
.
subtract
(
safe_logits
,
tf
.
math
.
log
(
sum_exp
))
return
log_probs
...
...
@@ -78,3 +78,20 @@ def variables_to_restore(scope=None, strip_scope=False):
return
variable_map
else
:
return
{
v
.
op
.
name
:
v
for
v
in
slim
.
get_variables_to_restore
()}
def
ConvertAllInputsToTensors
(
func
):
"""A decorator to convert all function's inputs into tensors.
Args:
func: a function to decorate.
Returns:
A decorated function.
"""
def
FuncWrapper
(
*
args
):
tensors
=
[
tf
.
convert_to_tensor
(
value
=
a
)
for
a
in
args
]
return
func
(
*
tensors
)
return
FuncWrapper
research/deeplab/datasets/build_cityscapes_data.py
View file @
31ca3b97
...
...
@@ -113,17 +113,23 @@ def _get_files(data, dataset_split):
Args:
data: String, desired data ('image' or 'label').
dataset_split: String, dataset split ('train', 'val', 'test')
dataset_split: String, dataset split ('train
_fine
', 'val
_fine
', 'test
_fine
')
Returns:
A list of sorted file names or None when getting label for
test set.
"""
if
data
==
'label'
and
dataset_split
==
'test'
:
return
None
if
dataset_split
==
'train_fine'
:
split_dir
=
'train'
elif
dataset_split
==
'val_fine'
:
split_dir
=
'val'
elif
dataset_split
==
'test_fine'
:
split_dir
=
'test'
else
:
raise
RuntimeError
(
"Split {} is not supported"
.
format
(
dataset_split
))
pattern
=
'*%s.%s'
%
(
_POSTFIX_MAP
[
data
],
_DATA_FORMAT_MAP
[
data
])
search_files
=
os
.
path
.
join
(
FLAGS
.
cityscapes_root
,
_FOLDERS_MAP
[
data
],
dataset_
split
,
'*'
,
pattern
)
FLAGS
.
cityscapes_root
,
_FOLDERS_MAP
[
data
],
split
_dir
,
'*'
,
pattern
)
filenames
=
glob
.
glob
(
search_files
)
return
sorted
(
filenames
)
...
...
@@ -132,7 +138,7 @@ def _convert_dataset(dataset_split):
"""Converts the specified dataset split to TFRecord format.
Args:
dataset_split: The dataset split (e.g., train
, val
).
dataset_split: The dataset split (e.g., train
_fine, val_fine
).
Raises:
RuntimeError: If loaded image and label have different shape, or if the
...
...
@@ -142,8 +148,12 @@ def _convert_dataset(dataset_split):
label_files
=
_get_files
(
'label'
,
dataset_split
)
num_images
=
len
(
image_files
)
num_labels
=
len
(
label_files
)
num_per_shard
=
int
(
math
.
ceil
(
num_images
/
_NUM_SHARDS
))
if
num_images
!=
num_labels
:
raise
RuntimeError
(
"The number of images and labels doesn't match: {} {}"
.
format
(
num_images
,
num_labels
))
image_reader
=
build_data
.
ImageReader
(
'png'
,
channels
=
3
)
label_reader
=
build_data
.
ImageReader
(
'png'
,
channels
=
1
)
...
...
@@ -179,8 +189,8 @@ def _convert_dataset(dataset_split):
def
main
(
unused_argv
):
# Only support converting 'train
' and 'val
' sets for now.
for
dataset_split
in
[
'train'
,
'val'
]:
# Only support converting 'train
_fine', 'val_fine' and 'test_fine
' sets for now.
for
dataset_split
in
[
'train
_fine
'
,
'val
_fine'
,
'test_fine
'
]:
_convert_dataset
(
dataset_split
)
...
...
research/deeplab/datasets/convert_cityscapes.sh
View file @
31ca3b97
...
...
@@ -42,6 +42,8 @@ WORK_DIR="."
# Root path for Cityscapes dataset.
CITYSCAPES_ROOT
=
"
${
WORK_DIR
}
/cityscapes"
export
PYTHONPATH
=
"
${
CITYSCAPES_ROOT
}
:
${
PYTHONPATH
}
"
# Create training labels.
python
"
${
CITYSCAPES_ROOT
}
/cityscapesscripts/preparation/createTrainIdLabelImgs.py"
...
...
research/deeplab/deprecated/segmentation_dataset.py
View file @
31ca3b97
...
...
@@ -81,8 +81,8 @@ DatasetDescriptor = collections.namedtuple(
_CITYSCAPES_INFORMATION
=
DatasetDescriptor
(
splits_to_sizes
=
{
'train'
:
2975
,
'val'
:
500
,
'train
_fine
'
:
2975
,
'val
_fine
'
:
500
,
},
num_classes
=
19
,
ignore_label
=
255
,
...
...
research/deeplab/g3doc/cityscapes.md
View file @
31ca3b97
...
...
@@ -43,7 +43,7 @@ A local training job using `xception_65` can be run with the following command:
python deeplab/train.py
\
--logtostderr
\
--training_number_of_steps
=
90000
\
--train_split
=
"train"
\
--train_split
=
"train
_fine
"
\
--model_variant
=
"xception_65"
\
--atrous_rates
=
6
\
--atrous_rates
=
12
\
...
...
@@ -95,7 +95,7 @@ command:
# From tensorflow/models/research/
python deeplab/eval.py
\
--logtostderr
\
--eval_split
=
"val"
\
--eval_split
=
"val
_fine
"
\
--model_variant
=
"xception_65"
\
--atrous_rates
=
6
\
--atrous_rates
=
12
\
...
...
@@ -121,7 +121,7 @@ command:
# From tensorflow/models/research/
python deeplab/vis.py
\
--logtostderr
\
--vis_split
=
"val"
\
--vis_split
=
"val
_fine
"
\
--model_variant
=
"xception_65"
\
--atrous_rates
=
6
\
--atrous_rates
=
12
\
...
...
research/deeplab/g3doc/installation.md
View file @
31ca3b97
...
...
@@ -68,6 +68,6 @@ Quick running the whole code on the PASCAL VOC 2012 dataset:
```
bash
# From tensorflow/models/research/deeplab
sh local_test.sh
ba
sh local_test.sh
```
research/deeplab/local_test.sh
View file @
31ca3b97
...
...
@@ -19,7 +19,7 @@
#
# Usage:
# # From the tensorflow/models/research/deeplab directory.
# sh ./local_test.sh
#
ba
sh ./local_test.sh
#
#
...
...
@@ -42,7 +42,7 @@ python "${WORK_DIR}"/model_test.py
# Go to datasets folder and download PASCAL VOC 2012 segmentation dataset.
DATASET_DIR
=
"datasets"
cd
"
${
WORK_DIR
}
/
${
DATASET_DIR
}
"
sh download_and_convert_voc2012.sh
ba
sh download_and_convert_voc2012.sh
# Go back to original directory.
cd
"
${
CURRENT_DIR
}
"
...
...
research/delf/INSTALL_INSTRUCTIONS.md
View file @
31ca3b97
## DELF installation
### Installation script
We now have a script to do the entire installation in one shot. Navigate to the
directory
`models/research/delf/delf/python/training`
, then run:
```
bash
# From models/research/delf/delf/python/training
bash install_delf.sh
```
If this works, you are done! If not, see below for detailed instructions for
installing this codebase and its dependencies.
*
Please note that this installation script only works on 64 bits Linux
architectures due to the
`protoc`
binary that is automatically downloaded. If
you wish to install the DELF library on other architectures please update the
[
`install_delf.sh`
](
delf/python/training/install_delf.sh
)
script by referencing
the desired
`protoc`
[
binary release
](
https://github.com/protocolbuffers/protobuf/releases
)
.
*
In more detail: the
`install_delf.sh`
script installs both the DELF library and
its dependencies in the following sequence:
*
Install TensorFlow 2.2 and TensorFlow 2.2 for GPU.
*
Install the
[
TF-Slim
](
https://github.com/google-research/tf-slim
)
library
from source.
*
Download
[
protoc
](
https://github.com/protocolbuffers/protobuf
)
and compile
the DELF Protocol Buffers.
*
Install the matplotlib, numpy, scikit-image, scipy and python3-tk Python
libraries.
*
Install the
[
TensorFlow Object Detection API
](
https://github.com/tensorflow/models/tree/master/research/object_detection
)
from the cloned TensorFlow Model Garden repository.
*
Install the DELF package.
### Tensorflow
[

](https://github.com/tensorflow/tensorflow/releases/tag/v2.
1
.0)
[

](https://github.com/tensorflow/tensorflow/releases/tag/v2.
2
.0)
[

](https://www.python.org/downloads/release/python-360/)
For detailed steps to install Tensorflow, follow the
...
...
@@ -11,9 +46,9 @@ typical user can install Tensorflow using one of the following commands:
```
bash
# For CPU:
pip3
install
'tensorflow'
pip3
install
'tensorflow
>=2.2.0
'
# For GPU:
pip3
install
'tensorflow-gpu'
pip3
install
'tensorflow-gpu
>=2.2.0
'
```
### TF-Slim
...
...
research/delf/README.md
View file @
31ca3b97
# Deep Local and Global Image Features
[

](https://github.com/tensorflow/tensorflow/releases/tag/v2.
1
.0)
[

](https://github.com/tensorflow/tensorflow/releases/tag/v2.
2
.0)
[

](https://www.python.org/downloads/release/python-360/)
This project presents code for deep local and global image feature methods,
...
...
@@ -41,7 +41,7 @@ DELG:
```
"Unifying Deep Local and Global Features for Image Search",
B. Cao*, A. Araujo* and J. Sim,
arxiv:2001.05027
Proc. ECCV'20
```
GLDv2:
...
...
@@ -55,11 +55,11 @@ Proc. CVPR'20
## News
-
[Jul'20] Check out our ECCV'20 paper:
[
"Unifying Deep Local and Global Features for Image Search"
](
https://arxiv.org/abs/2001.05027
)
-
[Apr'20] Check out our CVPR'20 paper:
[
"Google Landmarks Dataset v2 - A
Large-Scale Benchmark for Instance-Level Recognition and
Retrieval"
](
https://arxiv.org/abs/2004.01804
)
-
[Jan'20] Check out our new paper:
[
"Unifying Deep Local and Global Features for Image Search"
](
https://arxiv.org/abs/2001.05027
)
-
[Jun'19] DELF achieved 2nd place in
[
CVPR Visual Localization challenge (Local Features track)
](
https://sites.google.com/corp/view/ltvl2019
)
.
See our slides
...
...
@@ -182,104 +182,55 @@ directories therein, `protos` and `python`.
### `delf/protos`
This directory contains protobufs:
-
`aggregation_config.proto`
: protobuf for configuring local feature
aggregation.
-
`box.proto`
: protobuf for serializing detected boxes.
-
`datum.proto`
: general-purpose protobuf for serializing float tensors.
-
`delf_config.proto`
: protobuf for configuring DELF/DELG extraction.
-
`feature.proto`
: protobuf for serializing DELF features.
This directory contains protobufs for local feature aggregation
(
`aggregation_config.proto`
), serializing detected boxes (
`box.proto`
),
serializing float tensors (
`datum.proto`
), configuring DELF/DELG extraction
(
`delf_config.proto`
), serializing local features (
`feature.proto`
).
### `delf/python`
This directory contains files for several different purposes:
-
`box_io.py`
,
`datum_io.py`
,
`feature_io.py`
are helper files for reading and
writing tensors and features.
-
`delf_v1.py`
contains code to create DELF models.
-
`feature_aggregation_extractor.py`
contains a module to perform local
feature aggregation.
-
`feature_aggregation_similarity.py`
contains a module to perform similarity
computation for aggregated local features.
-
`feature_extractor.py`
contains the code to extract features using DELF.
This is particularly useful for extracting features over multiple scales,
with keypoint selection based on attention scores, and PCA/whitening
post-processing.
The subdirectory
`delf/python/examples`
contains sample scripts to run DELF
feature extraction/matching, and object detection:
-
`delf_config_example.pbtxt`
shows an example instantiation of the DelfConfig
proto, used for DELF feature extraction.
-
`detector.py`
is a module to construct an object detector function.
-
`extract_boxes.py`
enables object detection from a list of images.
-
`extract_features.py`
enables DELF extraction from a list of images.
-
`extractor.py`
is a module to construct a DELF/DELG local feature extraction
function.
-
`match_images.py`
supports image matching using DELF features extracted
using
`extract_features.py`
.
This directory contains files for several different purposes, such as:
reading/writing tensors/features (
`box_io.py`
,
`datum_io.py`
,
`feature_io.py`
),
local feature aggregation extraction and similarity computation
(
`feature_aggregation_extractor.py`
,
`feature_aggregation_similarity.py`
) and
helper functions for image/feature loading/processing (
`utils.py`
,
`feature_extractor.py`
).
The subdirectory
`delf/python/delg`
contains sample scripts/configs related to
the DELG paper:
The subdirectory
`delf/python/examples`
contains sample scripts to run DELF/DELG
feature extraction/matching (
`extractor.py`
,
`extract_features.py`
,
`match_images.py`
) and object detection (
`detector.py`
,
`extract_boxes.py`
).
`delf_config_example.pbtxt`
shows an example instantiation of the DelfConfig
proto, used for DELF feature extraction.
-
`delg_gld_config.pbtxt`
gives the DelfConfig used in DELG paper.
-
`extract_features.py`
for local+global feature extraction on Revisited
datasets.
-
`perform_retrieval.py`
for performing retrieval/evaluating methods on
Revisited datasets.
The subdirectory
`delf/python/delg`
contains sample scripts/configs related to
the DELG paper:
`extract_features.py`
for local+global feature extraction (with
and example
`delg_gld_config.pbtxt`
) and
`perform_retrieval.py`
for performing
retrieval/scoring.
The subdirectory
`delf/python/detect_to_retrieve`
contains sample
scripts/configs related to the Detect-to-Retrieve paper:
-
`aggregation_extraction.py`
is a library to extract/save feature
aggregation.
-
`boxes_and_features_extraction.py`
is a library to extract/save boxes and
DELF features.
-
`cluster_delf_features.py`
for local feature clustering.
-
`dataset.py`
for parsing/evaluating results on Revisited Oxford/Paris
datasets.
-
`delf_gld_config.pbtxt`
gives the DelfConfig used in Detect-to-Retrieve
paper.
-
`extract_aggregation.py`
for aggregated local feature extraction.
-
`extract_index_boxes_and_features.py`
for index image local feature
extraction / bounding box detection on Revisited datasets.
-
`extract_query_features.py`
for query image local feature extraction on
Revisited datasets.
-
`image_reranking.py`
is a module to re-rank images with geometric
verification.
-
`perform_retrieval.py`
for performing retrieval/evaluating methods using
aggregated local features on Revisited datasets.
-
`index_aggregation_config.pbtxt`
,
`query_aggregation_config.pbtxt`
give
AggregationConfig's for Detect-to-Retrieve experiments.
scripts/configs related to the Detect-to-Retrieve paper, for feature/box
extraction/aggregation/clustering (
`aggregation_extraction.py`
,
`boxes_and_features_extraction.py`
,
`cluster_delf_features.py`
,
`extract_aggregation.py`
,
`extract_index_boxes_and_features.py`
,
`extract_query_features.py`
), image retrieval/reranking (
`perform_retrieval.py`
,
`image_reranking.py`
), along with configs used for feature
extraction/aggregation (
`delf_gld_config.pbtxt`
,
`index_aggregation_config.pbtxt`
,
`query_aggregation_config.pbtxt`
) and
Revisited Oxford/Paris dataset parsing/evaluation (
`dataset.py`
).
The subdirectory
`delf/python/google_landmarks_dataset`
contains sample
scripts/modules for computing GLD metrics / reproducing results from the GLDv2
paper:
-
`compute_recognition_metrics.py`
performs recognition metric computation
given input predictions and solution files.
-
`compute_retrieval_metrics.py`
performs retrieval metric computation given
input predictions and solution files.
-
`dataset_file_io.py`
is a module for dataset-related file IO.
-
`metrics.py`
is a module for GLD metric computation.
-
`rn101_af_gldv2clean_config.pbtxt`
gives the DelfConfig used in the
ResNet101-ArcFace (trained on GLDv2-train-clean) baseline used in the GLDv2
paper.
scripts/modules for computing GLD metrics (
`metrics.py`
,
`compute_recognition_metrics.py`
,
`compute_retrieval_metrics.py`
), GLD file IO
(
`dataset_file_io.py`
) / reproducing results from the GLDv2 paper
(
`rn101_af_gldv2clean_config.pbtxt`
and the instructions therein).
The subdirectory
`delf/python/training`
contains sample scripts/modules for
performing DELF training:
-
`datasets/googlelandmarks.py`
is the dataset module used for training.
-
`model/delf_model.py`
is the model module used for training.
-
`model/export_model.py`
is a script for exporting trained models in the
format used by the inference code.
-
`model/export_model_utils.py`
is a module with utilities for model
exporting.
-
`model/resnet50.py`
is a module with a backbone RN50 implementation.
-
`build_image_dataset.py`
converts downloaded dataset into TFRecords format
for training.
-
`train.py`
is the main training script.
performing model training (
`train.py`
) based on a ResNet50 DELF model
(
`model/resnet50.py`
,
`model/delf_model.py`
), also presenting relevant model
exporting scripts and associated utils (
`model/export_model.py`
,
`model/export_global_model.py`
,
`model/export_model_utils.py`
) and dataset
downloading/preprocessing (
`download_dataset.sh`
,
`build_image_dataset.py`
,
`datasets/googlelandmarks.py`
).
Besides these, other files in the different subdirectories contain tests for the
various modules.
...
...
@@ -290,6 +241,16 @@ André Araujo (@andrefaraujo)
## Release history
### Jul, 2020
-
Full TF2 support. Only one minor
`compat.v1`
usage left. Updated
instructions to require TF2.2
-
Refactored / much improved training code, with very detailed, step-by-step
instructions
**Thanks to contributors**
: Dan Anghel, Barbara Fusinska and Andr
é
Araujo.
### May, 2020
-
Codebase is now Python3-first
...
...
research/delf/delf/protos/delf_config.proto
View file @
31ca3b97
...
...
@@ -86,6 +86,9 @@ message DelfConfig {
// Path to DELF model.
optional
string
model_path
=
1
;
// Required.
// Whether model has been exported using TF version 2+.
optional
bool
is_tf2_exported
=
10
[
default
=
false
];
// Image scales to be used.
repeated
float
image_scales
=
2
;
...
...
research/delf/delf/python/detect_to_retrieve/cluster_delf_features.py
View file @
31ca3b97
...
...
@@ -131,7 +131,7 @@ def main(argv):
delf_dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
features_placeholder
))
delf_dataset
=
delf_dataset
.
shuffle
(
1000
).
batch
(
features_for_clustering
.
shape
[
0
])
iterator
=
delf_
data
set
.
make_initializable_iterator
()
iterator
=
tf
.
compat
.
v1
.
data
.
make_initializable_iterator
(
delf_dataset
)
def
_initializer_fn
(
sess
):
"""Initialize dataset iterator, feed in the data."""
...
...
research/delf/delf/python/examples/extractor.py
View file @
31ca3b97
...
...
@@ -102,7 +102,15 @@ def MakeExtractor(config):
Returns:
Function that receives an image and returns features.
Raises:
ValueError: if config is invalid.
"""
# Assert the configuration
if
config
.
use_global_features
and
hasattr
(
config
,
'is_tf2_exported'
)
and
config
.
is_tf2_exported
:
raise
ValueError
(
'use_global_features is incompatible with is_tf2_exported'
)
# Load model.
model
=
tf
.
saved_model
.
load
(
config
.
model_path
)
...
...
@@ -178,7 +186,8 @@ def MakeExtractor(config):
else
:
global_pca_parameters
[
'variances'
]
=
None
model
=
model
.
prune
(
feeds
=
feeds
,
fetches
=
fetches
)
if
not
hasattr
(
config
,
'is_tf2_exported'
)
or
not
config
.
is_tf2_exported
:
model
=
model
.
prune
(
feeds
=
feeds
,
fetches
=
fetches
)
def
ExtractorFn
(
image
,
resize_factor
=
1.0
):
"""Receives an image and returns DELF global and/or local features.
...
...
@@ -197,7 +206,6 @@ def MakeExtractor(config):
features (key 'local_features' mapping to a dict with keys 'locations',
'descriptors', 'scales', 'attention').
"""
resized_image
,
scale_factors
=
ResizeImage
(
image
,
config
,
resize_factor
=
resize_factor
)
...
...
@@ -224,8 +232,20 @@ def MakeExtractor(config):
output
=
None
if
config
.
use_local_features
:
output
=
model
(
image_tensor
,
image_scales_tensor
,
score_threshold_tensor
,
max_feature_num_tensor
)
if
hasattr
(
config
,
'is_tf2_exported'
)
and
config
.
is_tf2_exported
:
predict
=
model
.
signatures
[
'serving_default'
]
output_dict
=
predict
(
input_image
=
image_tensor
,
input_scales
=
image_scales_tensor
,
input_max_feature_num
=
max_feature_num_tensor
,
input_abs_thres
=
score_threshold_tensor
)
output
=
[
output_dict
[
'boxes'
],
output_dict
[
'features'
],
output_dict
[
'scales'
],
output_dict
[
'scores'
]
]
else
:
output
=
model
(
image_tensor
,
image_scales_tensor
,
score_threshold_tensor
,
max_feature_num_tensor
)
else
:
output
=
model
(
image_tensor
,
image_scales_tensor
)
...
...
Prev
1
…
4
5
6
7
8
9
10
11
12
…
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment