Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
7c732da7
Commit
7c732da7
authored
Aug 12, 2019
by
Nimit Nigania
Browse files
Merge remote-tracking branch 'upstream/master'
parents
cb8ce606
e36934b3
Changes
25
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1120 additions
and
332 deletions
+1120
-332
official/bert/benchmark/bert_squad_benchmark.py
official/bert/benchmark/bert_squad_benchmark.py
+20
-7
official/bert/modeling.py
official/bert/modeling.py
+15
-0
official/bert/run_classifier.py
official/bert/run_classifier.py
+1
-1
official/bert/run_pretraining.py
official/bert/run_pretraining.py
+1
-1
official/bert/run_squad.py
official/bert/run_squad.py
+1
-1
official/recommendation/data_pipeline.py
official/recommendation/data_pipeline.py
+64
-50
official/recommendation/data_test.py
official/recommendation/data_test.py
+14
-6
official/recommendation/ncf_common.py
official/recommendation/ncf_common.py
+6
-16
official/recommendation/ncf_estimator_main.py
official/recommendation/ncf_estimator_main.py
+1
-1
official/recommendation/ncf_input_pipeline.py
official/recommendation/ncf_input_pipeline.py
+14
-28
official/recommendation/ncf_keras_main.py
official/recommendation/ncf_keras_main.py
+231
-192
official/recommendation/ncf_test.py
official/recommendation/ncf_test.py
+1
-1
official/recommendation/neumf_model.py
official/recommendation/neumf_model.py
+17
-19
official/utils/misc/distribution_utils.py
official/utils/misc/distribution_utils.py
+21
-6
official/utils/misc/tpu_lib.py
official/utils/misc/tpu_lib.py
+5
-0
research/lstm_object_detection/README.md
research/lstm_object_detection/README.md
+5
-0
research/lstm_object_detection/configs/lstm_ssd_interleaved_mobilenet_v2_imagenet.config
...configs/lstm_ssd_interleaved_mobilenet_v2_imagenet.config
+239
-0
research/lstm_object_detection/eval.py
research/lstm_object_detection/eval.py
+1
-3
research/lstm_object_detection/export_tflite_lstd_graph.py
research/lstm_object_detection/export_tflite_lstd_graph.py
+134
-0
research/lstm_object_detection/export_tflite_lstd_graph_lib.py
...rch/lstm_object_detection/export_tflite_lstd_graph_lib.py
+329
-0
No files found.
official/bert/benchmark/bert_squad_benchmark.py
View file @
7c732da7
...
...
@@ -152,7 +152,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_squad'
)
FLAGS
.
train_batch_size
=
4
FLAGS
.
train_batch_size
=
3
self
.
_run_and_report_benchmark
()
...
...
@@ -174,7 +174,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat_squad'
)
FLAGS
.
train_batch_size
=
4
FLAGS
.
train_batch_size
=
3
self
.
_run_and_report_benchmark
(
use_ds
=
False
)
...
...
@@ -185,7 +185,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_eager_no_dist_strat_squad'
)
FLAGS
.
train_batch_size
=
4
FLAGS
.
train_batch_size
=
3
self
.
_run_and_report_benchmark
(
use_ds
=
False
,
run_eagerly
=
True
)
...
...
@@ -195,7 +195,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
_setup
()
self
.
num_gpus
=
2
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_2_gpu_squad'
)
FLAGS
.
train_batch_size
=
8
FLAGS
.
train_batch_size
=
6
self
.
_run_and_report_benchmark
()
...
...
@@ -205,7 +205,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
_setup
()
self
.
num_gpus
=
4
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_4_gpu_squad'
)
FLAGS
.
train_batch_size
=
1
6
FLAGS
.
train_batch_size
=
1
2
self
.
_run_and_report_benchmark
()
...
...
@@ -215,7 +215,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
_setup
()
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad'
)
FLAGS
.
train_batch_size
=
3
2
FLAGS
.
train_batch_size
=
2
4
self
.
_run_and_report_benchmark
()
...
...
@@ -231,6 +231,19 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_xla_fp16
(
self
):
"""Tests BERT SQuAD model performance with 1 GPU with XLA and FP16."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_xla_squad_fp16'
)
FLAGS
.
train_batch_size
=
4
FLAGS
.
enable_xla
=
True
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
'dynamic'
self
.
_run_and_report_benchmark
()
def
benchmark_2_gpu_fp16
(
self
):
"""Tests BERT SQuAD model performance with 2 GPUs and FP16."""
...
...
@@ -324,7 +337,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
self
.
_setup
()
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad'
)
FLAGS
.
train_batch_size
=
3
2
FLAGS
.
train_batch_size
=
2
4
self
.
_run_and_report_benchmark
()
...
...
official/bert/modeling.py
View file @
7c732da7
...
...
@@ -276,6 +276,7 @@ class EmbeddingPostprocessor(tf.keras.layers.Layer):
max_position_embeddings
=
512
,
dropout_prob
=
0.0
,
initializer_range
=
0.02
,
initializer
=
None
,
**
kwargs
):
super
(
EmbeddingPostprocessor
,
self
).
__init__
(
**
kwargs
)
self
.
use_type_embeddings
=
use_type_embeddings
...
...
@@ -285,6 +286,11 @@ class EmbeddingPostprocessor(tf.keras.layers.Layer):
self
.
dropout_prob
=
dropout_prob
self
.
initializer_range
=
initializer_range
if
not
initializer
:
self
.
initializer
=
get_initializer
(
self
.
initializer_range
)
else
:
self
.
initializer
=
initializer
if
self
.
use_type_embeddings
and
not
self
.
token_type_vocab_size
:
raise
ValueError
(
"If `use_type_embeddings` is True, then "
"`token_type_vocab_size` must be specified."
)
...
...
@@ -723,6 +729,15 @@ class TransformerBlock(tf.keras.layers.Layer):
name
=
"output_layer_norm"
,
axis
=-
1
,
epsilon
=
1e-12
)
super
(
TransformerBlock
,
self
).
build
(
unused_input_shapes
)
def
common_layers
(
self
):
"""Explicitly gets all layer objects inside a Transformer encoder block."""
return
[
self
.
attention_layer
,
self
.
attention_output_dense
,
self
.
attention_dropout
,
self
.
attention_layer_norm
,
self
.
intermediate_dense
,
self
.
output_dense
,
self
.
output_dropout
,
self
.
output_layer_norm
]
def
__call__
(
self
,
input_tensor
,
attention_mask
=
None
):
inputs
=
pack_inputs
([
input_tensor
,
attention_mask
])
return
super
(
TransformerBlock
,
self
).
__call__
(
inputs
)
...
...
official/bert/run_classifier.py
View file @
7c732da7
...
...
@@ -35,8 +35,8 @@ from official.bert import model_saving_utils
from
official.bert
import
model_training_utils
from
official.bert
import
modeling
from
official.bert
import
optimization
from
official.bert
import
tpu_lib
from
official.utils.misc
import
keras_utils
from
official.utils.misc
import
tpu_lib
flags
.
DEFINE_enum
(
'mode'
,
'train_and_eval'
,
[
'train_and_eval'
,
'export_only'
],
...
...
official/bert/run_pretraining.py
View file @
7c732da7
...
...
@@ -33,7 +33,7 @@ from official.bert import model_saving_utils
from
official.bert
import
model_training_utils
from
official.bert
import
modeling
from
official.bert
import
optimization
from
official.
bert
import
tpu_lib
from
official.
utils.misc
import
tpu_lib
flags
.
DEFINE_string
(
'input_files'
,
None
,
'File path to retrieve training data for pre-training.'
)
...
...
official/bert/run_squad.py
View file @
7c732da7
...
...
@@ -36,8 +36,8 @@ from official.bert import modeling
from
official.bert
import
optimization
from
official.bert
import
squad_lib
from
official.bert
import
tokenization
from
official.bert
import
tpu_lib
from
official.utils.misc
import
keras_utils
from
official.utils.misc
import
tpu_lib
flags
.
DEFINE_bool
(
'do_train'
,
False
,
'Whether to run training.'
)
flags
.
DEFINE_bool
(
'do_predict'
,
False
,
'Whether to run eval on the dev set.'
)
...
...
official/recommendation/data_pipeline.py
View file @
7c732da7
...
...
@@ -143,37 +143,32 @@ class DatasetManager(object):
if
is_training
:
return
{
movielens
.
USER_COLUMN
:
tf
.
io
.
FixedLenFeature
([
batch_size
],
dtype
=
tf
.
int64
),
tf
.
io
.
FixedLenFeature
([
batch_size
,
1
],
dtype
=
tf
.
int64
),
movielens
.
ITEM_COLUMN
:
tf
.
io
.
FixedLenFeature
([
batch_size
],
dtype
=
tf
.
int64
),
tf
.
io
.
FixedLenFeature
([
batch_size
,
1
],
dtype
=
tf
.
int64
),
rconst
.
VALID_POINT_MASK
:
tf
.
io
.
FixedLenFeature
([
batch_size
],
dtype
=
tf
.
int64
),
tf
.
io
.
FixedLenFeature
([
batch_size
,
1
],
dtype
=
tf
.
int64
),
"labels"
:
tf
.
io
.
FixedLenFeature
([
batch_size
],
dtype
=
tf
.
int64
)
tf
.
io
.
FixedLenFeature
([
batch_size
,
1
],
dtype
=
tf
.
int64
)
}
else
:
return
{
movielens
.
USER_COLUMN
:
tf
.
io
.
FixedLenFeature
([
batch_size
],
dtype
=
tf
.
int64
),
tf
.
io
.
FixedLenFeature
([
batch_size
,
1
],
dtype
=
tf
.
int64
),
movielens
.
ITEM_COLUMN
:
tf
.
io
.
FixedLenFeature
([
batch_size
],
dtype
=
tf
.
int64
),
tf
.
io
.
FixedLenFeature
([
batch_size
,
1
],
dtype
=
tf
.
int64
),
rconst
.
DUPLICATE_MASK
:
tf
.
io
.
FixedLenFeature
([
batch_size
],
dtype
=
tf
.
int64
)
tf
.
io
.
FixedLenFeature
([
batch_size
,
1
],
dtype
=
tf
.
int64
)
}
features
=
tf
.
io
.
parse_single_example
(
serialized_data
,
_get_feature_map
(
batch_size
,
is_training
=
is_training
))
users
=
tf
.
reshape
(
tf
.
cast
(
features
[
movielens
.
USER_COLUMN
],
rconst
.
USER_DTYPE
),
(
batch_size
,))
items
=
tf
.
reshape
(
tf
.
cast
(
features
[
movielens
.
ITEM_COLUMN
],
rconst
.
ITEM_DTYPE
),
(
batch_size
,))
users
=
tf
.
cast
(
features
[
movielens
.
USER_COLUMN
],
rconst
.
USER_DTYPE
)
items
=
tf
.
cast
(
features
[
movielens
.
ITEM_COLUMN
],
rconst
.
ITEM_DTYPE
)
if
is_training
:
valid_point_mask
=
tf
.
reshape
(
tf
.
cast
(
features
[
movielens
.
ITEM_COLUMN
],
tf
.
bool
),
(
batch_size
,))
fake_dup_mask
=
tf
.
zeros_like
(
features
[
movielens
.
USER_COLUMN
])
valid_point_mask
=
tf
.
cast
(
features
[
rconst
.
VALID_POINT_MASK
],
tf
.
bool
)
fake_dup_mask
=
tf
.
zeros_like
(
users
)
return
{
movielens
.
USER_COLUMN
:
users
,
movielens
.
ITEM_COLUMN
:
items
,
...
...
@@ -184,20 +179,15 @@ class DatasetManager(object):
rconst
.
DUPLICATE_MASK
:
fake_dup_mask
}
else
:
labels
=
tf
.
reshape
(
tf
.
cast
(
tf
.
zeros_like
(
features
[
movielens
.
USER_COLUMN
]),
tf
.
bool
),
(
batch_size
,
1
))
fake_valid_pt_mask
=
tf
.
cast
(
tf
.
zeros_like
(
features
[
movielens
.
USER_COLUMN
]),
tf
.
bool
)
labels
=
tf
.
cast
(
tf
.
zeros_like
(
users
),
tf
.
bool
)
fake_valid_pt_mask
=
tf
.
cast
(
tf
.
zeros_like
(
users
),
tf
.
bool
)
return
{
movielens
.
USER_COLUMN
:
users
,
movielens
.
ITEM_COLUMN
:
items
,
rconst
.
DUPLICATE_MASK
:
tf
.
reshape
(
tf
.
cast
(
features
[
rconst
.
DUPLICATE_MASK
],
tf
.
bool
),
(
batch_size
,)),
tf
.
cast
(
features
[
rconst
.
DUPLICATE_MASK
],
tf
.
bool
),
rconst
.
VALID_POINT_MASK
:
fake_valid_pt_mask
,
rconst
.
TRAIN_LABEL_KEY
:
...
...
@@ -221,8 +211,8 @@ class DatasetManager(object):
if
self
.
_is_training
:
mask_start_index
=
data
.
pop
(
rconst
.
MASK_START_INDEX
)
batch_size
=
data
[
movielens
.
ITEM_COLUMN
].
shape
[
0
]
data
[
rconst
.
VALID_POINT_MASK
]
=
np
.
les
s
(
np
.
arange
(
batch_size
),
mask_start_index
)
data
[
rconst
.
VALID_POINT_MASK
]
=
np
.
expand_dim
s
(
np
.
less
(
np
.
arange
(
batch_size
),
mask_start_index
)
,
-
1
)
if
self
.
_stream_files
:
example_bytes
=
self
.
serialize
(
data
)
...
...
@@ -313,19 +303,21 @@ class DatasetManager(object):
else
:
types
=
{
movielens
.
USER_COLUMN
:
rconst
.
USER_DTYPE
,
movielens
.
ITEM_COLUMN
:
rconst
.
ITEM_DTYPE
}
shapes
=
{
movielens
.
USER_COLUMN
:
tf
.
TensorShape
([
batch_size
]),
movielens
.
ITEM_COLUMN
:
tf
.
TensorShape
([
batch_size
])}
shapes
=
{
movielens
.
USER_COLUMN
:
tf
.
TensorShape
([
batch_size
,
1
]),
movielens
.
ITEM_COLUMN
:
tf
.
TensorShape
([
batch_size
,
1
])
}
if
self
.
_is_training
:
types
[
rconst
.
VALID_POINT_MASK
]
=
np
.
bool
shapes
[
rconst
.
VALID_POINT_MASK
]
=
tf
.
TensorShape
([
batch_size
])
shapes
[
rconst
.
VALID_POINT_MASK
]
=
tf
.
TensorShape
([
batch_size
,
1
])
types
=
(
types
,
np
.
bool
)
shapes
=
(
shapes
,
tf
.
TensorShape
([
batch_size
]))
shapes
=
(
shapes
,
tf
.
TensorShape
([
batch_size
,
1
]))
else
:
types
[
rconst
.
DUPLICATE_MASK
]
=
np
.
bool
shapes
[
rconst
.
DUPLICATE_MASK
]
=
tf
.
TensorShape
([
batch_size
])
shapes
[
rconst
.
DUPLICATE_MASK
]
=
tf
.
TensorShape
([
batch_size
,
1
])
data_generator
=
functools
.
partial
(
self
.
data_generator
,
epochs_between_evals
=
epochs_between_evals
)
...
...
@@ -554,12 +546,17 @@ class BaseDataConstructor(threading.Thread):
items
=
np
.
concatenate
([
items
,
item_pad
])
labels
=
np
.
concatenate
([
labels
,
label_pad
])
self
.
_train_dataset
.
put
(
i
,
{
movielens
.
USER_COLUMN
:
users
,
movielens
.
ITEM_COLUMN
:
items
,
rconst
.
MASK_START_INDEX
:
np
.
array
(
mask_start_index
,
dtype
=
np
.
int32
),
"labels"
:
labels
,
})
self
.
_train_dataset
.
put
(
i
,
{
movielens
.
USER_COLUMN
:
np
.
reshape
(
users
,
(
self
.
train_batch_size
,
1
)),
movielens
.
ITEM_COLUMN
:
np
.
reshape
(
items
,
(
self
.
train_batch_size
,
1
)),
rconst
.
MASK_START_INDEX
:
np
.
array
(
mask_start_index
,
dtype
=
np
.
int32
),
"labels"
:
np
.
reshape
(
labels
,
(
self
.
train_batch_size
,
1
)),
})
def
_wait_to_construct_train_epoch
(
self
):
count
=
0
...
...
@@ -649,11 +646,15 @@ class BaseDataConstructor(threading.Thread):
users
,
items
,
duplicate_mask
=
self
.
_assemble_eval_batch
(
users
,
positive_items
,
negative_items
,
self
.
_eval_users_per_batch
)
self
.
_eval_dataset
.
put
(
i
,
{
movielens
.
USER_COLUMN
:
users
.
flatten
(),
movielens
.
ITEM_COLUMN
:
items
.
flatten
(),
rconst
.
DUPLICATE_MASK
:
duplicate_mask
.
flatten
(),
})
self
.
_eval_dataset
.
put
(
i
,
{
movielens
.
USER_COLUMN
:
np
.
reshape
(
users
.
flatten
(),
(
self
.
eval_batch_size
,
1
)),
movielens
.
ITEM_COLUMN
:
np
.
reshape
(
items
.
flatten
(),
(
self
.
eval_batch_size
,
1
)),
rconst
.
DUPLICATE_MASK
:
np
.
reshape
(
duplicate_mask
.
flatten
(),
(
self
.
eval_batch_size
,
1
)),
})
def
_construct_eval_epoch
(
self
):
"""Loop to construct data for evaluation."""
...
...
@@ -720,24 +721,37 @@ class DummyConstructor(threading.Thread):
num_users
=
params
[
"num_users"
]
num_items
=
params
[
"num_items"
]
users
=
tf
.
random
.
uniform
([
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
users
=
tf
.
random
.
uniform
([
batch_size
,
1
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
num_users
)
items
=
tf
.
random
.
uniform
([
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
items
=
tf
.
random
.
uniform
([
batch_size
,
1
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
num_items
)
if
is_training
:
valid_point_mask
=
tf
.
cast
(
tf
.
random
.
uniform
(
[
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
2
),
tf
.
bool
)
labels
=
tf
.
cast
(
tf
.
random
.
uniform
(
[
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
2
),
tf
.
bool
)
valid_point_mask
=
tf
.
cast
(
tf
.
random
.
uniform
([
batch_size
,
1
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
2
),
tf
.
bool
)
labels
=
tf
.
cast
(
tf
.
random
.
uniform
([
batch_size
,
1
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
2
),
tf
.
bool
)
data
=
{
movielens
.
USER_COLUMN
:
users
,
movielens
.
ITEM_COLUMN
:
items
,
rconst
.
VALID_POINT_MASK
:
valid_point_mask
,
},
labels
else
:
dupe_mask
=
tf
.
cast
(
tf
.
random
.
uniform
([
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
2
),
tf
.
bool
)
dupe_mask
=
tf
.
cast
(
tf
.
random
.
uniform
([
batch_size
,
1
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
2
),
tf
.
bool
)
data
=
{
movielens
.
USER_COLUMN
:
users
,
movielens
.
ITEM_COLUMN
:
items
,
...
...
official/recommendation/data_test.py
View file @
7c732da7
...
...
@@ -168,8 +168,11 @@ class BaseTest(tf.test.TestCase):
md5
=
hashlib
.
md5
()
for
features
,
labels
in
first_epoch
:
data_list
=
[
features
[
movielens
.
USER_COLUMN
],
features
[
movielens
.
ITEM_COLUMN
],
features
[
rconst
.
VALID_POINT_MASK
],
labels
]
features
[
movielens
.
USER_COLUMN
].
flatten
(),
features
[
movielens
.
ITEM_COLUMN
].
flatten
(),
features
[
rconst
.
VALID_POINT_MASK
].
flatten
(),
labels
.
flatten
()
]
for
i
in
data_list
:
md5
.
update
(
i
.
tobytes
())
...
...
@@ -216,8 +219,10 @@ class BaseTest(tf.test.TestCase):
md5
=
hashlib
.
md5
()
for
features
in
eval_data
:
data_list
=
[
features
[
movielens
.
USER_COLUMN
],
features
[
movielens
.
ITEM_COLUMN
],
features
[
rconst
.
DUPLICATE_MASK
]]
features
[
movielens
.
USER_COLUMN
].
flatten
(),
features
[
movielens
.
ITEM_COLUMN
].
flatten
(),
features
[
rconst
.
DUPLICATE_MASK
].
flatten
()
]
for
i
in
data_list
:
md5
.
update
(
i
.
tobytes
())
...
...
@@ -276,8 +281,11 @@ class BaseTest(tf.test.TestCase):
md5
=
hashlib
.
md5
()
for
features
,
labels
in
results
:
data_list
=
[
features
[
movielens
.
USER_COLUMN
],
features
[
movielens
.
ITEM_COLUMN
],
features
[
rconst
.
VALID_POINT_MASK
],
labels
]
features
[
movielens
.
USER_COLUMN
].
flatten
(),
features
[
movielens
.
ITEM_COLUMN
].
flatten
(),
features
[
rconst
.
VALID_POINT_MASK
].
flatten
(),
labels
.
flatten
()
]
for
i
in
data_list
:
md5
.
update
(
i
.
tobytes
())
...
...
official/recommendation/ncf_common.py
View file @
7c732da7
...
...
@@ -37,7 +37,6 @@ from official.utils.flags import core as flags_core
from
official.utils.misc
import
distribution_utils
from
official.utils.misc
import
keras_utils
FLAGS
=
flags
.
FLAGS
...
...
@@ -60,13 +59,8 @@ def get_inputs(params):
dataset
=
FLAGS
.
dataset
,
data_dir
=
FLAGS
.
data_dir
,
params
=
params
,
constructor_type
=
FLAGS
.
constructor_type
,
deterministic
=
FLAGS
.
seed
is
not
None
)
num_train_steps
=
(
producer
.
train_batches_per_epoch
//
params
[
"batches_per_step"
])
num_eval_steps
=
(
producer
.
eval_batches_per_epoch
//
params
[
"batches_per_step"
])
assert
not
producer
.
train_batches_per_epoch
%
params
[
"batches_per_step"
]
assert
not
producer
.
eval_batches_per_epoch
%
params
[
"batches_per_step"
]
num_train_steps
=
producer
.
train_batches_per_epoch
num_eval_steps
=
producer
.
eval_batches_per_epoch
return
num_users
,
num_items
,
num_train_steps
,
num_eval_steps
,
producer
...
...
@@ -74,18 +68,13 @@ def get_inputs(params):
def
parse_flags
(
flags_obj
):
"""Convenience function to turn flags into params."""
num_gpus
=
flags_core
.
get_num_gpus
(
flags_obj
)
num_devices
=
FLAGS
.
num_tpu_shards
if
FLAGS
.
tpu
else
num_gpus
or
1
batch_size
=
(
flags_obj
.
batch_size
+
num_devices
-
1
)
//
num_devices
eval_divisor
=
(
rconst
.
NUM_EVAL_NEGATIVES
+
1
)
*
num_devices
batch_size
=
flags_obj
.
batch_size
eval_batch_size
=
flags_obj
.
eval_batch_size
or
flags_obj
.
batch_size
eval_batch_size
=
((
eval_batch_size
+
eval_divisor
-
1
)
//
eval_divisor
*
eval_divisor
//
num_devices
)
return
{
"train_epochs"
:
flags_obj
.
train_epochs
,
"batches_per_step"
:
num_devices
,
"batches_per_step"
:
1
,
"use_seed"
:
flags_obj
.
seed
is
not
None
,
"batch_size"
:
batch_size
,
"eval_batch_size"
:
eval_batch_size
,
...
...
@@ -95,6 +84,7 @@ def parse_flags(flags_obj):
"mf_regularization"
:
flags_obj
.
mf_regularization
,
"mlp_reg_layers"
:
[
float
(
reg
)
for
reg
in
flags_obj
.
mlp_regularization
],
"num_neg"
:
flags_obj
.
num_neg
,
"distribution_strategy"
:
flags_obj
.
distribution_strategy
,
"num_gpus"
:
num_gpus
,
"use_tpu"
:
flags_obj
.
tpu
is
not
None
,
"tpu"
:
flags_obj
.
tpu
,
...
...
@@ -115,7 +105,7 @@ def parse_flags(flags_obj):
}
def
get_distribution_strategy
(
params
):
def
get_
v1_
distribution_strategy
(
params
):
"""Returns the distribution strategy to use."""
if
params
[
"use_tpu"
]:
# Some of the networking libraries are quite chatty.
...
...
official/recommendation/ncf_estimator_main.py
View file @
7c732da7
...
...
@@ -66,7 +66,7 @@ def construct_estimator(model_dir, params):
Returns:
An Estimator or TPUEstimator.
"""
distribution
=
ncf_common
.
get_distribution_strategy
(
params
)
distribution
=
ncf_common
.
get_
v1_
distribution_strategy
(
params
)
run_config
=
tf
.
estimator
.
RunConfig
(
train_distribute
=
distribution
,
eval_distribute
=
distribution
)
...
...
official/recommendation/ncf_input_pipeline.py
View file @
7c732da7
...
...
@@ -82,7 +82,6 @@ def create_dataset_from_data_producer(producer, params):
Returns:
Processed training features.
"""
labels
=
tf
.
expand_dims
(
labels
,
-
1
)
fake_dup_mask
=
tf
.
zeros_like
(
features
[
movielens
.
USER_COLUMN
])
features
[
rconst
.
DUPLICATE_MASK
]
=
fake_dup_mask
features
[
rconst
.
TRAIN_LABEL_KEY
]
=
labels
...
...
@@ -106,7 +105,6 @@ def create_dataset_from_data_producer(producer, params):
Processed evaluation features.
"""
labels
=
tf
.
cast
(
tf
.
zeros_like
(
features
[
movielens
.
USER_COLUMN
]),
tf
.
bool
)
labels
=
tf
.
expand_dims
(
labels
,
-
1
)
fake_valid_pt_mask
=
tf
.
cast
(
tf
.
zeros_like
(
features
[
movielens
.
USER_COLUMN
]),
tf
.
bool
)
features
[
rconst
.
VALID_POINT_MASK
]
=
fake_valid_pt_mask
...
...
@@ -134,9 +132,13 @@ def create_ncf_input_data(params, producer=None, input_meta_data=None):
Returns:
(training dataset, evaluation dataset, train steps per epoch,
eval steps per epoch)
"""
Raises:
ValueError: If data is being generated online for when using TPU's.
"""
if
params
[
"train_dataset_path"
]:
assert
params
[
"eval_dataset_path"
]
train_dataset
=
create_dataset_from_tf_record_files
(
params
[
"train_dataset_path"
],
input_meta_data
[
"train_prebatch_size"
],
...
...
@@ -148,34 +150,18 @@ def create_ncf_input_data(params, producer=None, input_meta_data=None):
params
[
"eval_batch_size"
],
is_training
=
False
)
# TODO(b/259377621): Remove number of devices (i.e.
# params["batches_per_step"]) in input pipeline logic and only use
# global batch size instead.
num_train_steps
=
int
(
np
.
ceil
(
input_meta_data
[
"num_train_steps"
]
/
params
[
"batches_per_step"
]))
num_eval_steps
=
(
input_meta_data
[
"num_eval_steps"
]
//
params
[
"batches_per_step"
])
num_train_steps
=
int
(
input_meta_data
[
"num_train_steps"
])
num_eval_steps
=
int
(
input_meta_data
[
"num_eval_steps"
])
else
:
assert
producer
if
params
[
"use_tpu"
]:
raise
ValueError
(
"TPU training does not support data producer yet. "
"Use pre-processed data."
)
assert
producer
# Start retrieving data from producer.
train_dataset
,
eval_dataset
=
create_dataset_from_data_producer
(
producer
,
params
)
num_train_steps
=
(
producer
.
train_batches_per_epoch
//
params
[
"batches_per_step"
])
num_eval_steps
=
(
producer
.
eval_batches_per_epoch
//
params
[
"batches_per_step"
])
assert
not
producer
.
train_batches_per_epoch
%
params
[
"batches_per_step"
]
assert
not
producer
.
eval_batches_per_epoch
%
params
[
"batches_per_step"
]
# It is required that for distributed training, the dataset must call
# batch(). The parameter of batch() here is the number of replicas involed,
# such that each replica evenly gets a slice of data.
# drop_remainder = True, as we would like batch call to return a fixed shape
# vs None, this prevents a expensive broadcast during weighted_loss
batches_per_step
=
params
[
"batches_per_step"
]
train_dataset
=
train_dataset
.
batch
(
batches_per_step
,
drop_remainder
=
True
)
eval_dataset
=
eval_dataset
.
batch
(
batches_per_step
,
drop_remainder
=
True
)
num_train_steps
=
producer
.
train_batches_per_epoch
num_eval_steps
=
producer
.
eval_batches_per_epoch
return
train_dataset
,
eval_dataset
,
num_train_steps
,
num_eval_steps
official/recommendation/ncf_keras_main.py
View file @
7c732da7
This diff is collapsed.
Click to expand it.
official/recommendation/ncf_test.py
View file @
7c732da7
...
...
@@ -189,7 +189,7 @@ class NcfTest(tf.test.TestCase):
self
.
assertAlmostEqual
(
ndcg
,
(
1
+
math
.
log
(
2
)
/
math
.
log
(
3
)
+
2
*
math
.
log
(
2
)
/
math
.
log
(
4
))
/
4
)
_BASE_END_TO_END_FLAGS
=
[
'-batch_size'
,
'10
2
4'
,
'-train_epochs'
,
'1'
]
_BASE_END_TO_END_FLAGS
=
[
'-batch_size'
,
'10
4
4'
,
'-train_epochs'
,
'1'
]
@
unittest
.
skipIf
(
keras_utils
.
is_v2_0
(),
"TODO(b/136018594)"
)
@
mock
.
patch
.
object
(
rconst
,
"SYNTHETIC_BATCHES_PER_EPOCH"
,
100
)
...
...
official/recommendation/neumf_model.py
View file @
7c732da7
...
...
@@ -109,7 +109,6 @@ def neumf_model_fn(features, labels, mode, params):
mlperf_helper
.
ncf_print
(
key
=
mlperf_helper
.
TAGS
.
OPT_HP_ADAM_EPSILON
,
value
=
params
[
"epsilon"
])
optimizer
=
tf
.
compat
.
v1
.
train
.
AdamOptimizer
(
learning_rate
=
params
[
"learning_rate"
],
beta1
=
params
[
"beta1"
],
...
...
@@ -151,7 +150,7 @@ def _strip_first_and_last_dimension(x, batch_size):
return
tf
.
reshape
(
x
[
0
,
:],
(
batch_size
,))
def
construct_model
(
user_input
,
item_input
,
params
,
need_strip
=
False
):
def
construct_model
(
user_input
,
item_input
,
params
):
# type: (tf.Tensor, tf.Tensor, dict) -> tf.keras.Model
"""Initialize NeuMF model.
...
...
@@ -184,34 +183,33 @@ def construct_model(user_input, item_input, params, need_strip=False):
# Initializer for embedding layers
embedding_initializer
=
"glorot_uniform"
if
need_strip
:
batch_size
=
params
[
"batch_size"
]
user_input_reshaped
=
tf
.
keras
.
layers
.
Lambda
(
lambda
x
:
_strip_first_and_last_dimension
(
x
,
batch_size
))(
user_input
)
def
mf_slice_fn
(
x
):
x
=
tf
.
squeeze
(
x
,
[
1
])
return
x
[:,
:
mf_dim
]
item_input_reshaped
=
tf
.
keras
.
layers
.
Lambda
(
lambda
x
:
_strip_first_and_last_dimension
(
x
,
batch_size
))(
item_input
)
def
mlp_slice_fn
(
x
):
x
=
tf
.
squeeze
(
x
,
[
1
])
return
x
[:,
mf_dim
:]
# It turns out to be significantly more effecient to store the MF and MLP
# embedding portions in the same table, and then slice as needed.
mf_slice_fn
=
lambda
x
:
x
[:,
:
mf_dim
]
mlp_slice_fn
=
lambda
x
:
x
[:,
mf_dim
:]
embedding_user
=
tf
.
keras
.
layers
.
Embedding
(
num_users
,
mf_dim
+
model_layers
[
0
]
//
2
,
num_users
,
mf_dim
+
model_layers
[
0
]
//
2
,
embeddings_initializer
=
embedding_initializer
,
embeddings_regularizer
=
tf
.
keras
.
regularizers
.
l2
(
mf_regularization
),
input_length
=
1
,
name
=
"embedding_user"
)(
user_input_reshaped
if
need_strip
else
user_input
)
input_length
=
1
,
name
=
"embedding_user"
)(
user_input
)
embedding_item
=
tf
.
keras
.
layers
.
Embedding
(
num_items
,
mf_dim
+
model_layers
[
0
]
//
2
,
num_items
,
mf_dim
+
model_layers
[
0
]
//
2
,
embeddings_initializer
=
embedding_initializer
,
embeddings_regularizer
=
tf
.
keras
.
regularizers
.
l2
(
mf_regularization
),
input_length
=
1
,
name
=
"embedding_item"
)(
item_input_reshaped
if
need_strip
else
item_input
)
input_length
=
1
,
name
=
"embedding_item"
)(
item_input
)
# GMF part
mf_user_latent
=
tf
.
keras
.
layers
.
Lambda
(
...
...
official/utils/misc/distribution_utils.py
View file @
7c732da7
...
...
@@ -24,6 +24,8 @@ import random
import
string
import
tensorflow
as
tf
from
official.utils.misc
import
tpu_lib
def
_collective_communication
(
all_reduce_alg
):
"""Return a CollectiveCommunication based on all_reduce_alg.
...
...
@@ -83,16 +85,18 @@ def get_distribution_strategy(distribution_strategy="default",
num_gpus
=
0
,
num_workers
=
1
,
all_reduce_alg
=
None
,
num_packs
=
1
):
num_packs
=
1
,
tpu_address
=
None
):
"""Return a DistributionStrategy for running the model.
Args:
distribution_strategy: a string specifying which distribution strategy to
use. Accepted values are 'off', 'default', 'one_device', 'mirrored',
'parameter_server', 'multi_worker_mirrored', case insensitive.
'off' means
not to use Distribution Strategy; 'default' means to choose from
'parameter_server', 'multi_worker_mirrored',
and 'tpu' --
case insensitive.
'off' means
not to use Distribution Strategy; 'default' means to choose from
`MirroredStrategy`, `MultiWorkerMirroredStrategy`, or `OneDeviceStrategy`
according to the number of GPUs and number of workers.
according to the number of GPUs and number of workers. 'tpu' means to use
TPUStrategy using `tpu_address`.
num_gpus: Number of GPUs to run this model.
num_workers: Number of workers to run this model.
all_reduce_alg: Optional. Specifies which algorithm to use when performing
...
...
@@ -102,12 +106,14 @@ def get_distribution_strategy(distribution_strategy="default",
device topology.
num_packs: Optional. Sets the `num_packs` in `tf.distribute.NcclAllReduce`
or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`.
tpu_address: Optional. String that represents TPU to connect to. Must not
be None if `distribution_strategy` is set to `tpu`.
Returns:
tf.distribute.DistibutionStrategy object.
Raises:
ValueError: if `distribution_strategy` is 'off' or 'one_device' and
`num_gpus` is larger than 1; or `num_gpus` is negative.
`num_gpus` is larger than 1; or `num_gpus` is negative or if
`distribution_strategy` is `tpu` but `tpu_address` is not specified.
"""
if
num_gpus
<
0
:
raise
ValueError
(
"`num_gpus` can not be negative."
)
...
...
@@ -120,6 +126,15 @@ def get_distribution_strategy(distribution_strategy="default",
"flag cannot be set to 'off'."
.
format
(
num_gpus
,
num_workers
))
return
None
if
distribution_strategy
==
"tpu"
:
if
not
tpu_address
:
raise
ValueError
(
"`tpu_address` must be specified when using "
"TPUStrategy."
)
# Initialize TPU System.
cluster_resolver
=
tpu_lib
.
tpu_initialize
(
tpu_address
)
return
tf
.
distribute
.
experimental
.
TPUStrategy
(
cluster_resolver
)
if
distribution_strategy
==
"multi_worker_mirrored"
:
return
tf
.
distribute
.
experimental
.
MultiWorkerMirroredStrategy
(
communication
=
_collective_communication
(
all_reduce_alg
))
...
...
official/
bert
/tpu_lib.py
→
official/
utils/misc
/tpu_lib.py
View file @
7c732da7
...
...
@@ -31,3 +31,8 @@ def tpu_initialize(tpu_address):
tf
.
config
.
experimental_connect_to_host
(
cluster_resolver
.
master
())
tf
.
tpu
.
experimental
.
initialize_tpu_system
(
cluster_resolver
)
return
cluster_resolver
def
get_primary_cpu_task
(
use_remote_tpu
=
False
):
"""Returns remote TPU worker address. No-op for GPU/CPU training."""
return
"/job:worker"
if
use_remote_tpu
else
""
research/lstm_object_detection/README.md
View file @
7c732da7
...
...
@@ -32,3 +32,8 @@ https://scholar.googleusercontent.com/scholar.bib?q=info:rLqvkztmWYgJ:scholar.go
*
yinxiao@google.com
*
menglong@google.com
*
yongzhe@google.com
## Table of Contents
*
<a
href=
'g3doc/exporting_models.md'
>
Exporting a trained model
</a>
research/lstm_object_detection/configs/lstm_ssd_interleaved_mobilenet_v2_imagenet.config
0 → 100644
View file @
7c732da7
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# For training on Imagenet Video with LSTM Interleaved Mobilenet V2
[
lstm_object_detection
.
protos
.
lstm_model
] {
train_unroll_length
:
4
eval_unroll_length
:
4
lstm_state_depth
:
320
depth_multipliers
:
1
.
4
depth_multipliers
:
0
.
35
pre_bottleneck
:
true
low_res
:
true
train_interleave_method
:
'RANDOM_SKIP_SMALL'
eval_interleave_method
:
'SKIP3'
}
model
{
ssd
{
num_classes
:
30
# Num of class for imagenet vid dataset.
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
anchor_generator
{
ssd_anchor_generator
{
num_layers
:
5
min_scale
:
0
.
2
max_scale
:
0
.
95
aspect_ratios
:
1
.
0
aspect_ratios
:
2
.
0
aspect_ratios
:
0
.
5
aspect_ratios
:
3
.
0
aspect_ratios
:
0
.
3333
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
320
width
:
320
}
}
box_predictor
{
convolutional_box_predictor
{
min_depth
:
0
max_depth
:
0
num_layers_before_predictor
:
3
use_dropout
:
false
dropout_keep_probability
:
0
.
8
kernel_size
:
3
box_code_size
:
4
apply_sigmoid_to_scores
:
false
use_depthwise
:
true
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
train
:
true
,
scale
:
true
,
center
:
true
,
decay
:
0
.
9997
,
epsilon
:
0
.
001
,
}
}
}
}
feature_extractor
{
type
:
'lstm_ssd_interleaved_mobilenet_v2'
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
train
:
true
,
scale
:
true
,
center
:
true
,
decay
:
0
.
9997
,
epsilon
:
0
.
001
,
}
}
}
loss
{
classification_loss
{
weighted_sigmoid
{
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
hard_example_miner
{
num_hard_examples
:
3000
iou_threshold
:
0
.
99
loss_type
:
CLASSIFICATION
max_negatives_per_positive
:
3
min_negatives_per_image
:
0
}
classification_weight
:
1
.
0
localization_weight
:
4
.
0
}
normalize_loss_by_num_matches
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
: -
20
.
0
iou_threshold
:
0
.
5
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
batch_size
:
8
optimizer
{
use_moving_average
:
false
rms_prop_optimizer
: {
learning_rate
: {
exponential_decay_learning_rate
{
initial_learning_rate
:
0
.
002
decay_steps
:
200000
decay_factor
:
0
.
95
}
}
momentum_optimizer_value
:
0
.
9
decay
:
0
.
9
epsilon
:
1
.
0
}
}
gradient_clipping_by_norm
:
10
.
0
batch_queue_capacity
:
12
prefetch_queue_capacity
:
4
}
train_input_reader
: {
shuffle_buffer_size
:
32
queue_capacity
:
12
prefetch_size
:
12
min_after_dequeue
:
4
label_map_path
:
"path/to/label_map"
external_input_reader
{
[
lstm_object_detection
.
protos
.
GoogleInputReader
.
google_input_reader
] {
tf_record_video_input_reader
: {
input_path
:
'/data/lstm_detection/tfrecords/test.tfrecord'
data_type
:
TF_SEQUENCE_EXAMPLE
video_length
:
4
}
}
}
}
eval_config
: {
metrics_set
:
"coco_evaluation_all_frames"
use_moving_averages
:
true
min_score_threshold
:
0
.
5
max_num_boxes_to_visualize
:
300
visualize_groundtruth_boxes
:
true
groundtruth_box_visualization_color
:
"red"
}
eval_input_reader
{
label_map_path
:
"path/to/label_map"
shuffle
:
true
num_epochs
:
1
num_parallel_batches
:
1
num_readers
:
1
external_input_reader
{
[
lstm_object_detection
.
protos
.
GoogleInputReader
.
google_input_reader
] {
tf_record_video_input_reader
: {
input_path
:
"path/to/sequence_example/data"
data_type
:
TF_SEQUENCE_EXAMPLE
video_length
:
10
}
}
}
}
eval_input_reader
: {
label_map_path
:
"path/to/label_map"
external_input_reader
{
[
lstm_object_detection
.
protos
.
GoogleInputReader
.
google_input_reader
] {
tf_record_video_input_reader
: {
input_path
:
"path/to/sequence_example/data"
data_type
:
TF_SEQUENCE_EXAMPLE
video_length
:
4
}
}
}
shuffle
:
true
num_readers
:
1
}
research/lstm_object_detection/eval.py
View file @
7c732da7
...
...
@@ -27,8 +27,6 @@ import functools
import
os
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
google3.pyglib
import
app
from
google3.pyglib
import
flags
from
lstm_object_detection
import
evaluator
from
lstm_object_detection
import
model_builder
from
lstm_object_detection.inputs
import
seq_dataset_builder
...
...
@@ -107,4 +105,4 @@ def main(unused_argv):
FLAGS
.
checkpoint_dir
,
FLAGS
.
eval_dir
)
if
__name__
==
'__main__'
:
app
.
run
()
tf
.
app
.
run
()
research/lstm_object_detection/export_tflite_lstd_graph.py
0 → 100644
View file @
7c732da7
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Exports an LSTM detection model to use with tf-lite.
Outputs file:
* A tflite compatible frozen graph - $output_directory/tflite_graph.pb
The exported graph has the following input and output nodes.
Inputs:
'input_video_tensor': a float32 tensor of shape
[unroll_length, height, width, 3] containing the normalized input image.
Note that the height and width must be compatible with the height and
width configured in the fixed_shape_image resizer options in the pipeline
config proto.
Outputs:
If add_postprocessing_op is true: frozen graph adds a
TFLite_Detection_PostProcess custom op node has four outputs:
detection_boxes: a float32 tensor of shape [1, num_boxes, 4] with box
locations
detection_classes: a float32 tensor of shape [1, num_boxes]
with class indices
detection_scores: a float32 tensor of shape [1, num_boxes]
with class scores
num_boxes: a float32 tensor of size 1 containing the number of detected boxes
else:
the graph has three outputs:
'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4]
containing the encoded box predictions.
'raw_outputs/class_predictions': a float32 tensor of shape
[1, num_anchors, num_classes] containing the class scores for each anchor
after applying score conversion.
'anchors': a float32 constant tensor of shape [num_anchors, 4]
containing the anchor boxes.
Example Usage:
--------------
python lstm_object_detection/export_tflite_lstd_graph.py \
--pipeline_config_path path/to/lstm_pipeline.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
The expected output would be in the directory
path/to/exported_model_directory (which is created if it does not exist)
with contents:
- tflite_graph.pbtxt
- tflite_graph.pb
Config overrides (see the `config_override` flag) are text protobufs
(also of type pipeline_pb2.TrainEvalPipelineConfig) which are used to override
certain fields in the provided pipeline_config_path. These are useful for
making small changes to the inference graph that differ from the training or
eval config.
Example Usage (in which we change the NMS iou_threshold to be 0.5 and
NMS score_threshold to be 0.0):
python lstm_object_detection/export_tflite_lstd_graph.py \
--pipeline_config_path path/to/lstm_pipeline.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
--config_override " \
model{ \
ssd{ \
post_processing { \
batch_non_max_suppression { \
score_threshold: 0.0 \
iou_threshold: 0.5 \
} \
} \
} \
} \
"
"""
import
tensorflow
as
tf
from
lstm_object_detection.utils
import
config_util
from
lstm_object_detection
import
export_tflite_lstd_graph_lib
flags
=
tf
.
app
.
flags
flags
.
DEFINE_string
(
'output_directory'
,
None
,
'Path to write outputs.'
)
flags
.
DEFINE_string
(
'pipeline_config_path'
,
None
,
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file.'
)
flags
.
DEFINE_string
(
'trained_checkpoint_prefix'
,
None
,
'Checkpoint prefix.'
)
flags
.
DEFINE_integer
(
'max_detections'
,
10
,
'Maximum number of detections (boxes) to show.'
)
flags
.
DEFINE_integer
(
'max_classes_per_detection'
,
1
,
'Maximum number of classes to output per detection box.'
)
flags
.
DEFINE_integer
(
'detections_per_class'
,
100
,
'Number of anchors used per class in Regular Non-Max-Suppression.'
)
flags
.
DEFINE_bool
(
'add_postprocessing_op'
,
True
,
'Add TFLite custom op for postprocessing to the graph.'
)
flags
.
DEFINE_bool
(
'use_regular_nms'
,
False
,
'Flag to set postprocessing op to use Regular NMS instead of Fast NMS.'
)
flags
.
DEFINE_string
(
'config_override'
,
''
,
'pipeline_pb2.TrainEvalPipelineConfig '
'text proto to override pipeline_config_path.'
)
FLAGS
=
flags
.
FLAGS
def
main
(
argv
):
del
argv
# Unused.
flags
.
mark_flag_as_required
(
'output_directory'
)
flags
.
mark_flag_as_required
(
'pipeline_config_path'
)
flags
.
mark_flag_as_required
(
'trained_checkpoint_prefix'
)
pipeline_config
=
config_util
.
get_configs_from_pipeline_file
(
FLAGS
.
pipeline_config_path
)
export_tflite_lstd_graph_lib
.
export_tflite_graph
(
pipeline_config
,
FLAGS
.
trained_checkpoint_prefix
,
FLAGS
.
output_directory
,
FLAGS
.
add_postprocessing_op
,
FLAGS
.
max_detections
,
FLAGS
.
max_classes_per_detection
,
use_regular_nms
=
FLAGS
.
use_regular_nms
)
if
__name__
==
'__main__'
:
tf
.
app
.
run
(
main
)
research/lstm_object_detection/export_tflite_lstd_graph_lib.py
0 → 100644
View file @
7c732da7
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Exports detection models to use with tf-lite.
See export_tflite_lstd_graph.py for usage.
"""
import
os
import
tempfile
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.core.framework
import
attr_value_pb2
from
tensorflow.core.framework
import
types_pb2
from
tensorflow.core.protobuf
import
saver_pb2
from
tensorflow.tools.graph_transforms
import
TransformGraph
from
object_detection
import
exporter
from
object_detection.builders
import
graph_rewriter_builder
from
object_detection.builders
import
post_processing_builder
from
object_detection.core
import
box_list
from
lstm_object_detection
import
model_builder
_DEFAULT_NUM_CHANNELS
=
3
_DEFAULT_NUM_COORD_BOX
=
4
def
get_const_center_size_encoded_anchors
(
anchors
):
"""Exports center-size encoded anchors as a constant tensor.
Args:
anchors: a float32 tensor of shape [num_anchors, 4] containing the anchor
boxes
Returns:
encoded_anchors: a float32 constant tensor of shape [num_anchors, 4]
containing the anchor boxes.
"""
anchor_boxlist
=
box_list
.
BoxList
(
anchors
)
y
,
x
,
h
,
w
=
anchor_boxlist
.
get_center_coordinates_and_sizes
()
num_anchors
=
y
.
get_shape
().
as_list
()
with
tf
.
Session
()
as
sess
:
y_out
,
x_out
,
h_out
,
w_out
=
sess
.
run
([
y
,
x
,
h
,
w
])
encoded_anchors
=
tf
.
constant
(
np
.
transpose
(
np
.
stack
((
y_out
,
x_out
,
h_out
,
w_out
))),
dtype
=
tf
.
float32
,
shape
=
[
num_anchors
[
0
],
_DEFAULT_NUM_COORD_BOX
],
name
=
'anchors'
)
return
encoded_anchors
def
append_postprocessing_op
(
frozen_graph_def
,
max_detections
,
max_classes_per_detection
,
nms_score_threshold
,
nms_iou_threshold
,
num_classes
,
scale_values
,
detections_per_class
=
100
,
use_regular_nms
=
False
):
"""Appends postprocessing custom op.
Args:
frozen_graph_def: Frozen GraphDef for SSD model after freezing the
checkpoint
max_detections: Maximum number of detections (boxes) to show
max_classes_per_detection: Number of classes to display per detection
nms_score_threshold: Score threshold used in Non-maximal suppression in
post-processing
nms_iou_threshold: Intersection-over-union threshold used in Non-maximal
suppression in post-processing
num_classes: number of classes in SSD detector
scale_values: scale values is a dict with following key-value pairs
{y_scale: 10, x_scale: 10, h_scale: 5, w_scale: 5} that are used in decode
centersize boxes
detections_per_class: In regular NonMaxSuppression, number of anchors used
for NonMaxSuppression per class
use_regular_nms: Flag to set postprocessing op to use Regular NMS instead
of Fast NMS.
Returns:
transformed_graph_def: Frozen GraphDef with postprocessing custom op
appended
TFLite_Detection_PostProcess custom op node has four outputs:
detection_boxes: a float32 tensor of shape [1, num_boxes, 4] with box
locations
detection_classes: a float32 tensor of shape [1, num_boxes]
with class indices
detection_scores: a float32 tensor of shape [1, num_boxes]
with class scores
num_boxes: a float32 tensor of size 1 containing the number of detected
boxes
"""
new_output
=
frozen_graph_def
.
node
.
add
()
new_output
.
op
=
'TFLite_Detection_PostProcess'
new_output
.
name
=
'TFLite_Detection_PostProcess'
new_output
.
attr
[
'_output_quantized'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
b
=
True
))
new_output
.
attr
[
'_output_types'
].
list
.
type
.
extend
([
types_pb2
.
DT_FLOAT
,
types_pb2
.
DT_FLOAT
,
types_pb2
.
DT_FLOAT
,
types_pb2
.
DT_FLOAT
])
new_output
.
attr
[
'_support_output_type_float_in_quantized_op'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
b
=
True
))
new_output
.
attr
[
'max_detections'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
i
=
max_detections
))
new_output
.
attr
[
'max_classes_per_detection'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
i
=
max_classes_per_detection
))
new_output
.
attr
[
'nms_score_threshold'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
f
=
nms_score_threshold
.
pop
()))
new_output
.
attr
[
'nms_iou_threshold'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
f
=
nms_iou_threshold
.
pop
()))
new_output
.
attr
[
'num_classes'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
i
=
num_classes
))
new_output
.
attr
[
'y_scale'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
f
=
scale_values
[
'y_scale'
].
pop
()))
new_output
.
attr
[
'x_scale'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
f
=
scale_values
[
'x_scale'
].
pop
()))
new_output
.
attr
[
'h_scale'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
f
=
scale_values
[
'h_scale'
].
pop
()))
new_output
.
attr
[
'w_scale'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
f
=
scale_values
[
'w_scale'
].
pop
()))
new_output
.
attr
[
'detections_per_class'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
i
=
detections_per_class
))
new_output
.
attr
[
'use_regular_nms'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
b
=
use_regular_nms
))
new_output
.
input
.
extend
(
[
'raw_outputs/box_encodings'
,
'raw_outputs/class_predictions'
,
'anchors'
])
# Transform the graph to append new postprocessing op
input_names
=
[]
output_names
=
[
'TFLite_Detection_PostProcess'
]
transforms
=
[
'strip_unused_nodes'
]
transformed_graph_def
=
TransformGraph
(
frozen_graph_def
,
input_names
,
output_names
,
transforms
)
return
transformed_graph_def
def
export_tflite_graph
(
pipeline_config
,
trained_checkpoint_prefix
,
output_dir
,
add_postprocessing_op
,
max_detections
,
max_classes_per_detection
,
detections_per_class
=
100
,
use_regular_nms
=
False
,
binary_graph_name
=
'tflite_graph.pb'
,
txt_graph_name
=
'tflite_graph.pbtxt'
):
"""Exports a tflite compatible graph and anchors for ssd detection model.
Anchors are written to a tensor and tflite compatible graph
is written to output_dir/tflite_graph.pb.
Args:
pipeline_config: Dictionary of configuration objects. Keys are `model`, `train_config`,
`train_input_config`, `eval_config`, `eval_input_config`, `lstm_model`.
Value are the corresponding config objects.
trained_checkpoint_prefix: a file prefix for the checkpoint containing the
trained parameters of the SSD model.
output_dir: A directory to write the tflite graph and anchor file to.
add_postprocessing_op: If add_postprocessing_op is true: frozen graph adds a
TFLite_Detection_PostProcess custom op
max_detections: Maximum number of detections (boxes) to show
max_classes_per_detection: Number of classes to display per detection
detections_per_class: In regular NonMaxSuppression, number of anchors used
for NonMaxSuppression per class
use_regular_nms: Flag to set postprocessing op to use Regular NMS instead
of Fast NMS.
binary_graph_name: Name of the exported graph file in binary format.
txt_graph_name: Name of the exported graph file in text format.
Raises:
ValueError: if the pipeline config contains models other than ssd or uses an
fixed_shape_resizer and provides a shape as well.
"""
model_config
=
pipeline_config
[
'model'
]
lstm_config
=
pipeline_config
[
'lstm_model'
]
eval_config
=
pipeline_config
[
'eval_config'
]
tf
.
gfile
.
MakeDirs
(
output_dir
)
if
model_config
.
WhichOneof
(
'model'
)
!=
'ssd'
:
raise
ValueError
(
'Only ssd models are supported in tflite. '
'Found {} in config'
.
format
(
model_config
.
WhichOneof
(
'model'
)))
num_classes
=
model_config
.
ssd
.
num_classes
nms_score_threshold
=
{
model_config
.
ssd
.
post_processing
.
batch_non_max_suppression
.
score_threshold
}
nms_iou_threshold
=
{
model_config
.
ssd
.
post_processing
.
batch_non_max_suppression
.
iou_threshold
}
scale_values
=
{}
scale_values
[
'y_scale'
]
=
{
model_config
.
ssd
.
box_coder
.
faster_rcnn_box_coder
.
y_scale
}
scale_values
[
'x_scale'
]
=
{
model_config
.
ssd
.
box_coder
.
faster_rcnn_box_coder
.
x_scale
}
scale_values
[
'h_scale'
]
=
{
model_config
.
ssd
.
box_coder
.
faster_rcnn_box_coder
.
height_scale
}
scale_values
[
'w_scale'
]
=
{
model_config
.
ssd
.
box_coder
.
faster_rcnn_box_coder
.
width_scale
}
image_resizer_config
=
model_config
.
ssd
.
image_resizer
image_resizer
=
image_resizer_config
.
WhichOneof
(
'image_resizer_oneof'
)
num_channels
=
_DEFAULT_NUM_CHANNELS
if
image_resizer
==
'fixed_shape_resizer'
:
height
=
image_resizer_config
.
fixed_shape_resizer
.
height
width
=
image_resizer_config
.
fixed_shape_resizer
.
width
if
image_resizer_config
.
fixed_shape_resizer
.
convert_to_grayscale
:
num_channels
=
1
#TODO(richardbrks) figure out how to make with a None defined batch size
shape
=
[
lstm_config
.
eval_unroll_length
,
height
,
width
,
num_channels
]
else
:
raise
ValueError
(
'Only fixed_shape_resizer'
'is supported with tflite. Found {}'
.
format
(
image_resizer_config
.
WhichOneof
(
'image_resizer_oneof'
)))
video_tensor
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
shape
,
name
=
'input_video_tensor'
)
detection_model
=
model_builder
.
build
(
model_config
,
lstm_config
,
is_training
=
False
)
preprocessed_video
,
true_image_shapes
=
detection_model
.
preprocess
(
tf
.
to_float
(
video_tensor
))
predicted_tensors
=
detection_model
.
predict
(
preprocessed_video
,
true_image_shapes
)
# predicted_tensors = detection_model.postprocess(predicted_tensors,
# true_image_shapes)
# The score conversion occurs before the post-processing custom op
_
,
score_conversion_fn
=
post_processing_builder
.
build
(
model_config
.
ssd
.
post_processing
)
class_predictions
=
score_conversion_fn
(
predicted_tensors
[
'class_predictions_with_background'
])
with
tf
.
name_scope
(
'raw_outputs'
):
# 'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4]
# containing the encoded box predictions. Note that these are raw
# predictions and no Non-Max suppression is applied on them and
# no decode center size boxes is applied to them.
tf
.
identity
(
predicted_tensors
[
'box_encodings'
],
name
=
'box_encodings'
)
# 'raw_outputs/class_predictions': a float32 tensor of shape
# [1, num_anchors, num_classes] containing the class scores for each anchor
# after applying score conversion.
tf
.
identity
(
class_predictions
,
name
=
'class_predictions'
)
# 'anchors': a float32 tensor of shape
# [4, num_anchors] containing the anchors as a constant node.
tf
.
identity
(
get_const_center_size_encoded_anchors
(
predicted_tensors
[
'anchors'
]),
name
=
'anchors'
)
# Add global step to the graph, so we know the training step number when we
# evaluate the model.
tf
.
train
.
get_or_create_global_step
()
# graph rewriter
is_quantized
=
(
'graph_rewriter'
in
pipeline_config
)
if
is_quantized
:
graph_rewriter_config
=
pipeline_config
[
'graph_rewriter'
]
graph_rewriter_fn
=
graph_rewriter_builder
.
build
(
graph_rewriter_config
,
is_training
=
False
,
is_export
=
True
)
graph_rewriter_fn
()
if
model_config
.
ssd
.
feature_extractor
.
HasField
(
'fpn'
):
exporter
.
rewrite_nn_resize_op
(
is_quantized
)
# freeze the graph
saver_kwargs
=
{}
if
eval_config
.
use_moving_averages
:
saver_kwargs
[
'write_version'
]
=
saver_pb2
.
SaverDef
.
V1
moving_average_checkpoint
=
tempfile
.
NamedTemporaryFile
()
exporter
.
replace_variable_values_with_moving_averages
(
tf
.
get_default_graph
(),
trained_checkpoint_prefix
,
moving_average_checkpoint
.
name
)
checkpoint_to_use
=
moving_average_checkpoint
.
name
else
:
checkpoint_to_use
=
trained_checkpoint_prefix
saver
=
tf
.
train
.
Saver
(
**
saver_kwargs
)
input_saver_def
=
saver
.
as_saver_def
()
frozen_graph_def
=
exporter
.
freeze_graph_with_def_protos
(
input_graph_def
=
tf
.
get_default_graph
().
as_graph_def
(),
input_saver_def
=
input_saver_def
,
input_checkpoint
=
checkpoint_to_use
,
output_node_names
=
','
.
join
([
'raw_outputs/box_encodings'
,
'raw_outputs/class_predictions'
,
'anchors'
]),
restore_op_name
=
'save/restore_all'
,
filename_tensor_name
=
'save/Const:0'
,
clear_devices
=
True
,
output_graph
=
''
,
initializer_nodes
=
''
)
# Add new operation to do post processing in a custom op (TF Lite only)
#(richardbrks) Do use this or detection_model.postprocess?
if
add_postprocessing_op
:
transformed_graph_def
=
append_postprocessing_op
(
frozen_graph_def
,
max_detections
,
max_classes_per_detection
,
nms_score_threshold
,
nms_iou_threshold
,
num_classes
,
scale_values
,
detections_per_class
,
use_regular_nms
)
else
:
# Return frozen without adding post-processing custom op
transformed_graph_def
=
frozen_graph_def
binary_graph
=
os
.
path
.
join
(
output_dir
,
binary_graph_name
)
with
tf
.
gfile
.
GFile
(
binary_graph
,
'wb'
)
as
f
:
f
.
write
(
transformed_graph_def
.
SerializeToString
())
txt_graph
=
os
.
path
.
join
(
output_dir
,
txt_graph_name
)
with
tf
.
gfile
.
GFile
(
txt_graph
,
'w'
)
as
f
:
f
.
write
(
str
(
transformed_graph_def
))
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment