Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
36ef0b7a
"...git@developer.sourcefind.cn:dadigang/Ventoy.git" did not exist on "9b2b4aa3547292c83fa4f8597bfb5d0e3d10539d"
Commit
36ef0b7a
authored
Aug 12, 2019
by
Nimit Nigania
Browse files
merge from master
parents
0b8b571a
7c732da7
Changes
25
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1127 additions
and
339 deletions
+1127
-339
official/bert/benchmark/bert_squad_benchmark.py
official/bert/benchmark/bert_squad_benchmark.py
+20
-7
official/bert/modeling.py
official/bert/modeling.py
+15
-0
official/bert/run_classifier.py
official/bert/run_classifier.py
+1
-1
official/bert/run_pretraining.py
official/bert/run_pretraining.py
+1
-1
official/bert/run_squad.py
official/bert/run_squad.py
+1
-1
official/recommendation/data_pipeline.py
official/recommendation/data_pipeline.py
+64
-50
official/recommendation/data_test.py
official/recommendation/data_test.py
+14
-6
official/recommendation/ncf_common.py
official/recommendation/ncf_common.py
+6
-16
official/recommendation/ncf_estimator_main.py
official/recommendation/ncf_estimator_main.py
+1
-1
official/recommendation/ncf_input_pipeline.py
official/recommendation/ncf_input_pipeline.py
+14
-28
official/recommendation/ncf_keras_main.py
official/recommendation/ncf_keras_main.py
+238
-199
official/recommendation/ncf_test.py
official/recommendation/ncf_test.py
+1
-1
official/recommendation/neumf_model.py
official/recommendation/neumf_model.py
+17
-19
official/utils/misc/distribution_utils.py
official/utils/misc/distribution_utils.py
+21
-6
official/utils/misc/tpu_lib.py
official/utils/misc/tpu_lib.py
+5
-0
research/lstm_object_detection/README.md
research/lstm_object_detection/README.md
+5
-0
research/lstm_object_detection/configs/lstm_ssd_interleaved_mobilenet_v2_imagenet.config
...configs/lstm_ssd_interleaved_mobilenet_v2_imagenet.config
+239
-0
research/lstm_object_detection/eval.py
research/lstm_object_detection/eval.py
+1
-3
research/lstm_object_detection/export_tflite_lstd_graph.py
research/lstm_object_detection/export_tflite_lstd_graph.py
+134
-0
research/lstm_object_detection/export_tflite_lstd_graph_lib.py
...rch/lstm_object_detection/export_tflite_lstd_graph_lib.py
+329
-0
No files found.
official/bert/benchmark/bert_squad_benchmark.py
View file @
36ef0b7a
...
...
@@ -152,7 +152,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_squad'
)
FLAGS
.
train_batch_size
=
4
FLAGS
.
train_batch_size
=
3
self
.
_run_and_report_benchmark
()
...
...
@@ -174,7 +174,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat_squad'
)
FLAGS
.
train_batch_size
=
4
FLAGS
.
train_batch_size
=
3
self
.
_run_and_report_benchmark
(
use_ds
=
False
)
...
...
@@ -185,7 +185,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_eager_no_dist_strat_squad'
)
FLAGS
.
train_batch_size
=
4
FLAGS
.
train_batch_size
=
3
self
.
_run_and_report_benchmark
(
use_ds
=
False
,
run_eagerly
=
True
)
...
...
@@ -195,7 +195,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
_setup
()
self
.
num_gpus
=
2
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_2_gpu_squad'
)
FLAGS
.
train_batch_size
=
8
FLAGS
.
train_batch_size
=
6
self
.
_run_and_report_benchmark
()
...
...
@@ -205,7 +205,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
_setup
()
self
.
num_gpus
=
4
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_4_gpu_squad'
)
FLAGS
.
train_batch_size
=
1
6
FLAGS
.
train_batch_size
=
1
2
self
.
_run_and_report_benchmark
()
...
...
@@ -215,7 +215,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
_setup
()
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad'
)
FLAGS
.
train_batch_size
=
3
2
FLAGS
.
train_batch_size
=
2
4
self
.
_run_and_report_benchmark
()
...
...
@@ -231,6 +231,19 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_xla_fp16
(
self
):
"""Tests BERT SQuAD model performance with 1 GPU with XLA and FP16."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_xla_squad_fp16'
)
FLAGS
.
train_batch_size
=
4
FLAGS
.
enable_xla
=
True
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
'dynamic'
self
.
_run_and_report_benchmark
()
def
benchmark_2_gpu_fp16
(
self
):
"""Tests BERT SQuAD model performance with 2 GPUs and FP16."""
...
...
@@ -324,7 +337,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
self
.
_setup
()
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad'
)
FLAGS
.
train_batch_size
=
3
2
FLAGS
.
train_batch_size
=
2
4
self
.
_run_and_report_benchmark
()
...
...
official/bert/modeling.py
View file @
36ef0b7a
...
...
@@ -276,6 +276,7 @@ class EmbeddingPostprocessor(tf.keras.layers.Layer):
max_position_embeddings
=
512
,
dropout_prob
=
0.0
,
initializer_range
=
0.02
,
initializer
=
None
,
**
kwargs
):
super
(
EmbeddingPostprocessor
,
self
).
__init__
(
**
kwargs
)
self
.
use_type_embeddings
=
use_type_embeddings
...
...
@@ -285,6 +286,11 @@ class EmbeddingPostprocessor(tf.keras.layers.Layer):
self
.
dropout_prob
=
dropout_prob
self
.
initializer_range
=
initializer_range
if
not
initializer
:
self
.
initializer
=
get_initializer
(
self
.
initializer_range
)
else
:
self
.
initializer
=
initializer
if
self
.
use_type_embeddings
and
not
self
.
token_type_vocab_size
:
raise
ValueError
(
"If `use_type_embeddings` is True, then "
"`token_type_vocab_size` must be specified."
)
...
...
@@ -723,6 +729,15 @@ class TransformerBlock(tf.keras.layers.Layer):
name
=
"output_layer_norm"
,
axis
=-
1
,
epsilon
=
1e-12
)
super
(
TransformerBlock
,
self
).
build
(
unused_input_shapes
)
def
common_layers
(
self
):
"""Explicitly gets all layer objects inside a Transformer encoder block."""
return
[
self
.
attention_layer
,
self
.
attention_output_dense
,
self
.
attention_dropout
,
self
.
attention_layer_norm
,
self
.
intermediate_dense
,
self
.
output_dense
,
self
.
output_dropout
,
self
.
output_layer_norm
]
def
__call__
(
self
,
input_tensor
,
attention_mask
=
None
):
inputs
=
pack_inputs
([
input_tensor
,
attention_mask
])
return
super
(
TransformerBlock
,
self
).
__call__
(
inputs
)
...
...
official/bert/run_classifier.py
View file @
36ef0b7a
...
...
@@ -35,8 +35,8 @@ from official.bert import model_saving_utils
from
official.bert
import
model_training_utils
from
official.bert
import
modeling
from
official.bert
import
optimization
from
official.bert
import
tpu_lib
from
official.utils.misc
import
keras_utils
from
official.utils.misc
import
tpu_lib
flags
.
DEFINE_enum
(
'mode'
,
'train_and_eval'
,
[
'train_and_eval'
,
'export_only'
],
...
...
official/bert/run_pretraining.py
View file @
36ef0b7a
...
...
@@ -33,7 +33,7 @@ from official.bert import model_saving_utils
from
official.bert
import
model_training_utils
from
official.bert
import
modeling
from
official.bert
import
optimization
from
official.
bert
import
tpu_lib
from
official.
utils.misc
import
tpu_lib
flags
.
DEFINE_string
(
'input_files'
,
None
,
'File path to retrieve training data for pre-training.'
)
...
...
official/bert/run_squad.py
View file @
36ef0b7a
...
...
@@ -36,8 +36,8 @@ from official.bert import modeling
from
official.bert
import
optimization
from
official.bert
import
squad_lib
from
official.bert
import
tokenization
from
official.bert
import
tpu_lib
from
official.utils.misc
import
keras_utils
from
official.utils.misc
import
tpu_lib
flags
.
DEFINE_bool
(
'do_train'
,
False
,
'Whether to run training.'
)
flags
.
DEFINE_bool
(
'do_predict'
,
False
,
'Whether to run eval on the dev set.'
)
...
...
official/recommendation/data_pipeline.py
View file @
36ef0b7a
...
...
@@ -143,37 +143,32 @@ class DatasetManager(object):
if
is_training
:
return
{
movielens
.
USER_COLUMN
:
tf
.
io
.
FixedLenFeature
([
batch_size
],
dtype
=
tf
.
int64
),
tf
.
io
.
FixedLenFeature
([
batch_size
,
1
],
dtype
=
tf
.
int64
),
movielens
.
ITEM_COLUMN
:
tf
.
io
.
FixedLenFeature
([
batch_size
],
dtype
=
tf
.
int64
),
tf
.
io
.
FixedLenFeature
([
batch_size
,
1
],
dtype
=
tf
.
int64
),
rconst
.
VALID_POINT_MASK
:
tf
.
io
.
FixedLenFeature
([
batch_size
],
dtype
=
tf
.
int64
),
tf
.
io
.
FixedLenFeature
([
batch_size
,
1
],
dtype
=
tf
.
int64
),
"labels"
:
tf
.
io
.
FixedLenFeature
([
batch_size
],
dtype
=
tf
.
int64
)
tf
.
io
.
FixedLenFeature
([
batch_size
,
1
],
dtype
=
tf
.
int64
)
}
else
:
return
{
movielens
.
USER_COLUMN
:
tf
.
io
.
FixedLenFeature
([
batch_size
],
dtype
=
tf
.
int64
),
tf
.
io
.
FixedLenFeature
([
batch_size
,
1
],
dtype
=
tf
.
int64
),
movielens
.
ITEM_COLUMN
:
tf
.
io
.
FixedLenFeature
([
batch_size
],
dtype
=
tf
.
int64
),
tf
.
io
.
FixedLenFeature
([
batch_size
,
1
],
dtype
=
tf
.
int64
),
rconst
.
DUPLICATE_MASK
:
tf
.
io
.
FixedLenFeature
([
batch_size
],
dtype
=
tf
.
int64
)
tf
.
io
.
FixedLenFeature
([
batch_size
,
1
],
dtype
=
tf
.
int64
)
}
features
=
tf
.
io
.
parse_single_example
(
serialized_data
,
_get_feature_map
(
batch_size
,
is_training
=
is_training
))
users
=
tf
.
reshape
(
tf
.
cast
(
features
[
movielens
.
USER_COLUMN
],
rconst
.
USER_DTYPE
),
(
batch_size
,))
items
=
tf
.
reshape
(
tf
.
cast
(
features
[
movielens
.
ITEM_COLUMN
],
rconst
.
ITEM_DTYPE
),
(
batch_size
,))
users
=
tf
.
cast
(
features
[
movielens
.
USER_COLUMN
],
rconst
.
USER_DTYPE
)
items
=
tf
.
cast
(
features
[
movielens
.
ITEM_COLUMN
],
rconst
.
ITEM_DTYPE
)
if
is_training
:
valid_point_mask
=
tf
.
reshape
(
tf
.
cast
(
features
[
movielens
.
ITEM_COLUMN
],
tf
.
bool
),
(
batch_size
,))
fake_dup_mask
=
tf
.
zeros_like
(
features
[
movielens
.
USER_COLUMN
])
valid_point_mask
=
tf
.
cast
(
features
[
rconst
.
VALID_POINT_MASK
],
tf
.
bool
)
fake_dup_mask
=
tf
.
zeros_like
(
users
)
return
{
movielens
.
USER_COLUMN
:
users
,
movielens
.
ITEM_COLUMN
:
items
,
...
...
@@ -184,20 +179,15 @@ class DatasetManager(object):
rconst
.
DUPLICATE_MASK
:
fake_dup_mask
}
else
:
labels
=
tf
.
reshape
(
tf
.
cast
(
tf
.
zeros_like
(
features
[
movielens
.
USER_COLUMN
]),
tf
.
bool
),
(
batch_size
,
1
))
fake_valid_pt_mask
=
tf
.
cast
(
tf
.
zeros_like
(
features
[
movielens
.
USER_COLUMN
]),
tf
.
bool
)
labels
=
tf
.
cast
(
tf
.
zeros_like
(
users
),
tf
.
bool
)
fake_valid_pt_mask
=
tf
.
cast
(
tf
.
zeros_like
(
users
),
tf
.
bool
)
return
{
movielens
.
USER_COLUMN
:
users
,
movielens
.
ITEM_COLUMN
:
items
,
rconst
.
DUPLICATE_MASK
:
tf
.
reshape
(
tf
.
cast
(
features
[
rconst
.
DUPLICATE_MASK
],
tf
.
bool
),
(
batch_size
,)),
tf
.
cast
(
features
[
rconst
.
DUPLICATE_MASK
],
tf
.
bool
),
rconst
.
VALID_POINT_MASK
:
fake_valid_pt_mask
,
rconst
.
TRAIN_LABEL_KEY
:
...
...
@@ -221,8 +211,8 @@ class DatasetManager(object):
if
self
.
_is_training
:
mask_start_index
=
data
.
pop
(
rconst
.
MASK_START_INDEX
)
batch_size
=
data
[
movielens
.
ITEM_COLUMN
].
shape
[
0
]
data
[
rconst
.
VALID_POINT_MASK
]
=
np
.
les
s
(
np
.
arange
(
batch_size
),
mask_start_index
)
data
[
rconst
.
VALID_POINT_MASK
]
=
np
.
expand_dim
s
(
np
.
less
(
np
.
arange
(
batch_size
),
mask_start_index
)
,
-
1
)
if
self
.
_stream_files
:
example_bytes
=
self
.
serialize
(
data
)
...
...
@@ -313,19 +303,21 @@ class DatasetManager(object):
else
:
types
=
{
movielens
.
USER_COLUMN
:
rconst
.
USER_DTYPE
,
movielens
.
ITEM_COLUMN
:
rconst
.
ITEM_DTYPE
}
shapes
=
{
movielens
.
USER_COLUMN
:
tf
.
TensorShape
([
batch_size
]),
movielens
.
ITEM_COLUMN
:
tf
.
TensorShape
([
batch_size
])}
shapes
=
{
movielens
.
USER_COLUMN
:
tf
.
TensorShape
([
batch_size
,
1
]),
movielens
.
ITEM_COLUMN
:
tf
.
TensorShape
([
batch_size
,
1
])
}
if
self
.
_is_training
:
types
[
rconst
.
VALID_POINT_MASK
]
=
np
.
bool
shapes
[
rconst
.
VALID_POINT_MASK
]
=
tf
.
TensorShape
([
batch_size
])
shapes
[
rconst
.
VALID_POINT_MASK
]
=
tf
.
TensorShape
([
batch_size
,
1
])
types
=
(
types
,
np
.
bool
)
shapes
=
(
shapes
,
tf
.
TensorShape
([
batch_size
]))
shapes
=
(
shapes
,
tf
.
TensorShape
([
batch_size
,
1
]))
else
:
types
[
rconst
.
DUPLICATE_MASK
]
=
np
.
bool
shapes
[
rconst
.
DUPLICATE_MASK
]
=
tf
.
TensorShape
([
batch_size
])
shapes
[
rconst
.
DUPLICATE_MASK
]
=
tf
.
TensorShape
([
batch_size
,
1
])
data_generator
=
functools
.
partial
(
self
.
data_generator
,
epochs_between_evals
=
epochs_between_evals
)
...
...
@@ -554,12 +546,17 @@ class BaseDataConstructor(threading.Thread):
items
=
np
.
concatenate
([
items
,
item_pad
])
labels
=
np
.
concatenate
([
labels
,
label_pad
])
self
.
_train_dataset
.
put
(
i
,
{
movielens
.
USER_COLUMN
:
users
,
movielens
.
ITEM_COLUMN
:
items
,
rconst
.
MASK_START_INDEX
:
np
.
array
(
mask_start_index
,
dtype
=
np
.
int32
),
"labels"
:
labels
,
})
self
.
_train_dataset
.
put
(
i
,
{
movielens
.
USER_COLUMN
:
np
.
reshape
(
users
,
(
self
.
train_batch_size
,
1
)),
movielens
.
ITEM_COLUMN
:
np
.
reshape
(
items
,
(
self
.
train_batch_size
,
1
)),
rconst
.
MASK_START_INDEX
:
np
.
array
(
mask_start_index
,
dtype
=
np
.
int32
),
"labels"
:
np
.
reshape
(
labels
,
(
self
.
train_batch_size
,
1
)),
})
def
_wait_to_construct_train_epoch
(
self
):
count
=
0
...
...
@@ -649,11 +646,15 @@ class BaseDataConstructor(threading.Thread):
users
,
items
,
duplicate_mask
=
self
.
_assemble_eval_batch
(
users
,
positive_items
,
negative_items
,
self
.
_eval_users_per_batch
)
self
.
_eval_dataset
.
put
(
i
,
{
movielens
.
USER_COLUMN
:
users
.
flatten
(),
movielens
.
ITEM_COLUMN
:
items
.
flatten
(),
rconst
.
DUPLICATE_MASK
:
duplicate_mask
.
flatten
(),
})
self
.
_eval_dataset
.
put
(
i
,
{
movielens
.
USER_COLUMN
:
np
.
reshape
(
users
.
flatten
(),
(
self
.
eval_batch_size
,
1
)),
movielens
.
ITEM_COLUMN
:
np
.
reshape
(
items
.
flatten
(),
(
self
.
eval_batch_size
,
1
)),
rconst
.
DUPLICATE_MASK
:
np
.
reshape
(
duplicate_mask
.
flatten
(),
(
self
.
eval_batch_size
,
1
)),
})
def
_construct_eval_epoch
(
self
):
"""Loop to construct data for evaluation."""
...
...
@@ -720,24 +721,37 @@ class DummyConstructor(threading.Thread):
num_users
=
params
[
"num_users"
]
num_items
=
params
[
"num_items"
]
users
=
tf
.
random
.
uniform
([
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
users
=
tf
.
random
.
uniform
([
batch_size
,
1
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
num_users
)
items
=
tf
.
random
.
uniform
([
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
items
=
tf
.
random
.
uniform
([
batch_size
,
1
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
num_items
)
if
is_training
:
valid_point_mask
=
tf
.
cast
(
tf
.
random
.
uniform
(
[
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
2
),
tf
.
bool
)
labels
=
tf
.
cast
(
tf
.
random
.
uniform
(
[
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
2
),
tf
.
bool
)
valid_point_mask
=
tf
.
cast
(
tf
.
random
.
uniform
([
batch_size
,
1
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
2
),
tf
.
bool
)
labels
=
tf
.
cast
(
tf
.
random
.
uniform
([
batch_size
,
1
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
2
),
tf
.
bool
)
data
=
{
movielens
.
USER_COLUMN
:
users
,
movielens
.
ITEM_COLUMN
:
items
,
rconst
.
VALID_POINT_MASK
:
valid_point_mask
,
},
labels
else
:
dupe_mask
=
tf
.
cast
(
tf
.
random
.
uniform
([
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
2
),
tf
.
bool
)
dupe_mask
=
tf
.
cast
(
tf
.
random
.
uniform
([
batch_size
,
1
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
2
),
tf
.
bool
)
data
=
{
movielens
.
USER_COLUMN
:
users
,
movielens
.
ITEM_COLUMN
:
items
,
...
...
official/recommendation/data_test.py
View file @
36ef0b7a
...
...
@@ -168,8 +168,11 @@ class BaseTest(tf.test.TestCase):
md5
=
hashlib
.
md5
()
for
features
,
labels
in
first_epoch
:
data_list
=
[
features
[
movielens
.
USER_COLUMN
],
features
[
movielens
.
ITEM_COLUMN
],
features
[
rconst
.
VALID_POINT_MASK
],
labels
]
features
[
movielens
.
USER_COLUMN
].
flatten
(),
features
[
movielens
.
ITEM_COLUMN
].
flatten
(),
features
[
rconst
.
VALID_POINT_MASK
].
flatten
(),
labels
.
flatten
()
]
for
i
in
data_list
:
md5
.
update
(
i
.
tobytes
())
...
...
@@ -216,8 +219,10 @@ class BaseTest(tf.test.TestCase):
md5
=
hashlib
.
md5
()
for
features
in
eval_data
:
data_list
=
[
features
[
movielens
.
USER_COLUMN
],
features
[
movielens
.
ITEM_COLUMN
],
features
[
rconst
.
DUPLICATE_MASK
]]
features
[
movielens
.
USER_COLUMN
].
flatten
(),
features
[
movielens
.
ITEM_COLUMN
].
flatten
(),
features
[
rconst
.
DUPLICATE_MASK
].
flatten
()
]
for
i
in
data_list
:
md5
.
update
(
i
.
tobytes
())
...
...
@@ -276,8 +281,11 @@ class BaseTest(tf.test.TestCase):
md5
=
hashlib
.
md5
()
for
features
,
labels
in
results
:
data_list
=
[
features
[
movielens
.
USER_COLUMN
],
features
[
movielens
.
ITEM_COLUMN
],
features
[
rconst
.
VALID_POINT_MASK
],
labels
]
features
[
movielens
.
USER_COLUMN
].
flatten
(),
features
[
movielens
.
ITEM_COLUMN
].
flatten
(),
features
[
rconst
.
VALID_POINT_MASK
].
flatten
(),
labels
.
flatten
()
]
for
i
in
data_list
:
md5
.
update
(
i
.
tobytes
())
...
...
official/recommendation/ncf_common.py
View file @
36ef0b7a
...
...
@@ -37,7 +37,6 @@ from official.utils.flags import core as flags_core
from
official.utils.misc
import
distribution_utils
from
official.utils.misc
import
keras_utils
FLAGS
=
flags
.
FLAGS
...
...
@@ -60,13 +59,8 @@ def get_inputs(params):
dataset
=
FLAGS
.
dataset
,
data_dir
=
FLAGS
.
data_dir
,
params
=
params
,
constructor_type
=
FLAGS
.
constructor_type
,
deterministic
=
FLAGS
.
seed
is
not
None
)
num_train_steps
=
(
producer
.
train_batches_per_epoch
//
params
[
"batches_per_step"
])
num_eval_steps
=
(
producer
.
eval_batches_per_epoch
//
params
[
"batches_per_step"
])
assert
not
producer
.
train_batches_per_epoch
%
params
[
"batches_per_step"
]
assert
not
producer
.
eval_batches_per_epoch
%
params
[
"batches_per_step"
]
num_train_steps
=
producer
.
train_batches_per_epoch
num_eval_steps
=
producer
.
eval_batches_per_epoch
return
num_users
,
num_items
,
num_train_steps
,
num_eval_steps
,
producer
...
...
@@ -74,18 +68,13 @@ def get_inputs(params):
def
parse_flags
(
flags_obj
):
"""Convenience function to turn flags into params."""
num_gpus
=
flags_core
.
get_num_gpus
(
flags_obj
)
num_devices
=
FLAGS
.
num_tpu_shards
if
FLAGS
.
tpu
else
num_gpus
or
1
batch_size
=
(
flags_obj
.
batch_size
+
num_devices
-
1
)
//
num_devices
eval_divisor
=
(
rconst
.
NUM_EVAL_NEGATIVES
+
1
)
*
num_devices
batch_size
=
flags_obj
.
batch_size
eval_batch_size
=
flags_obj
.
eval_batch_size
or
flags_obj
.
batch_size
eval_batch_size
=
((
eval_batch_size
+
eval_divisor
-
1
)
//
eval_divisor
*
eval_divisor
//
num_devices
)
return
{
"train_epochs"
:
flags_obj
.
train_epochs
,
"batches_per_step"
:
num_devices
,
"batches_per_step"
:
1
,
"use_seed"
:
flags_obj
.
seed
is
not
None
,
"batch_size"
:
batch_size
,
"eval_batch_size"
:
eval_batch_size
,
...
...
@@ -95,6 +84,7 @@ def parse_flags(flags_obj):
"mf_regularization"
:
flags_obj
.
mf_regularization
,
"mlp_reg_layers"
:
[
float
(
reg
)
for
reg
in
flags_obj
.
mlp_regularization
],
"num_neg"
:
flags_obj
.
num_neg
,
"distribution_strategy"
:
flags_obj
.
distribution_strategy
,
"num_gpus"
:
num_gpus
,
"use_tpu"
:
flags_obj
.
tpu
is
not
None
,
"tpu"
:
flags_obj
.
tpu
,
...
...
@@ -115,7 +105,7 @@ def parse_flags(flags_obj):
}
def
get_distribution_strategy
(
params
):
def
get_
v1_
distribution_strategy
(
params
):
"""Returns the distribution strategy to use."""
if
params
[
"use_tpu"
]:
# Some of the networking libraries are quite chatty.
...
...
official/recommendation/ncf_estimator_main.py
View file @
36ef0b7a
...
...
@@ -66,7 +66,7 @@ def construct_estimator(model_dir, params):
Returns:
An Estimator or TPUEstimator.
"""
distribution
=
ncf_common
.
get_distribution_strategy
(
params
)
distribution
=
ncf_common
.
get_
v1_
distribution_strategy
(
params
)
run_config
=
tf
.
estimator
.
RunConfig
(
train_distribute
=
distribution
,
eval_distribute
=
distribution
)
...
...
official/recommendation/ncf_input_pipeline.py
View file @
36ef0b7a
...
...
@@ -82,7 +82,6 @@ def create_dataset_from_data_producer(producer, params):
Returns:
Processed training features.
"""
labels
=
tf
.
expand_dims
(
labels
,
-
1
)
fake_dup_mask
=
tf
.
zeros_like
(
features
[
movielens
.
USER_COLUMN
])
features
[
rconst
.
DUPLICATE_MASK
]
=
fake_dup_mask
features
[
rconst
.
TRAIN_LABEL_KEY
]
=
labels
...
...
@@ -106,7 +105,6 @@ def create_dataset_from_data_producer(producer, params):
Processed evaluation features.
"""
labels
=
tf
.
cast
(
tf
.
zeros_like
(
features
[
movielens
.
USER_COLUMN
]),
tf
.
bool
)
labels
=
tf
.
expand_dims
(
labels
,
-
1
)
fake_valid_pt_mask
=
tf
.
cast
(
tf
.
zeros_like
(
features
[
movielens
.
USER_COLUMN
]),
tf
.
bool
)
features
[
rconst
.
VALID_POINT_MASK
]
=
fake_valid_pt_mask
...
...
@@ -134,9 +132,13 @@ def create_ncf_input_data(params, producer=None, input_meta_data=None):
Returns:
(training dataset, evaluation dataset, train steps per epoch,
eval steps per epoch)
"""
Raises:
ValueError: If data is being generated online for when using TPU's.
"""
if
params
[
"train_dataset_path"
]:
assert
params
[
"eval_dataset_path"
]
train_dataset
=
create_dataset_from_tf_record_files
(
params
[
"train_dataset_path"
],
input_meta_data
[
"train_prebatch_size"
],
...
...
@@ -148,34 +150,18 @@ def create_ncf_input_data(params, producer=None, input_meta_data=None):
params
[
"eval_batch_size"
],
is_training
=
False
)
# TODO(b/259377621): Remove number of devices (i.e.
# params["batches_per_step"]) in input pipeline logic and only use
# global batch size instead.
num_train_steps
=
int
(
np
.
ceil
(
input_meta_data
[
"num_train_steps"
]
/
params
[
"batches_per_step"
]))
num_eval_steps
=
(
input_meta_data
[
"num_eval_steps"
]
//
params
[
"batches_per_step"
])
num_train_steps
=
int
(
input_meta_data
[
"num_train_steps"
])
num_eval_steps
=
int
(
input_meta_data
[
"num_eval_steps"
])
else
:
assert
producer
if
params
[
"use_tpu"
]:
raise
ValueError
(
"TPU training does not support data producer yet. "
"Use pre-processed data."
)
assert
producer
# Start retrieving data from producer.
train_dataset
,
eval_dataset
=
create_dataset_from_data_producer
(
producer
,
params
)
num_train_steps
=
(
producer
.
train_batches_per_epoch
//
params
[
"batches_per_step"
])
num_eval_steps
=
(
producer
.
eval_batches_per_epoch
//
params
[
"batches_per_step"
])
assert
not
producer
.
train_batches_per_epoch
%
params
[
"batches_per_step"
]
assert
not
producer
.
eval_batches_per_epoch
%
params
[
"batches_per_step"
]
# It is required that for distributed training, the dataset must call
# batch(). The parameter of batch() here is the number of replicas involed,
# such that each replica evenly gets a slice of data.
# drop_remainder = True, as we would like batch call to return a fixed shape
# vs None, this prevents a expensive broadcast during weighted_loss
batches_per_step
=
params
[
"batches_per_step"
]
train_dataset
=
train_dataset
.
batch
(
batches_per_step
,
drop_remainder
=
True
)
eval_dataset
=
eval_dataset
.
batch
(
batches_per_step
,
drop_remainder
=
True
)
num_train_steps
=
producer
.
train_batches_per_epoch
num_eval_steps
=
producer
.
eval_batches_per_epoch
return
train_dataset
,
eval_dataset
,
num_train_steps
,
num_eval_steps
official/recommendation/ncf_keras_main.py
View file @
36ef0b7a
This diff is collapsed.
Click to expand it.
official/recommendation/ncf_test.py
View file @
36ef0b7a
...
...
@@ -189,7 +189,7 @@ class NcfTest(tf.test.TestCase):
self
.
assertAlmostEqual
(
ndcg
,
(
1
+
math
.
log
(
2
)
/
math
.
log
(
3
)
+
2
*
math
.
log
(
2
)
/
math
.
log
(
4
))
/
4
)
_BASE_END_TO_END_FLAGS
=
[
'-batch_size'
,
'10
2
4'
,
'-train_epochs'
,
'1'
]
_BASE_END_TO_END_FLAGS
=
[
'-batch_size'
,
'10
4
4'
,
'-train_epochs'
,
'1'
]
@
unittest
.
skipIf
(
keras_utils
.
is_v2_0
(),
"TODO(b/136018594)"
)
@
mock
.
patch
.
object
(
rconst
,
"SYNTHETIC_BATCHES_PER_EPOCH"
,
100
)
...
...
official/recommendation/neumf_model.py
View file @
36ef0b7a
...
...
@@ -109,7 +109,6 @@ def neumf_model_fn(features, labels, mode, params):
mlperf_helper
.
ncf_print
(
key
=
mlperf_helper
.
TAGS
.
OPT_HP_ADAM_EPSILON
,
value
=
params
[
"epsilon"
])
optimizer
=
tf
.
compat
.
v1
.
train
.
AdamOptimizer
(
learning_rate
=
params
[
"learning_rate"
],
beta1
=
params
[
"beta1"
],
...
...
@@ -151,7 +150,7 @@ def _strip_first_and_last_dimension(x, batch_size):
return
tf
.
reshape
(
x
[
0
,
:],
(
batch_size
,))
def
construct_model
(
user_input
,
item_input
,
params
,
need_strip
=
False
):
def
construct_model
(
user_input
,
item_input
,
params
):
# type: (tf.Tensor, tf.Tensor, dict) -> tf.keras.Model
"""Initialize NeuMF model.
...
...
@@ -184,34 +183,33 @@ def construct_model(user_input, item_input, params, need_strip=False):
# Initializer for embedding layers
embedding_initializer
=
"glorot_uniform"
if
need_strip
:
batch_size
=
params
[
"batch_size"
]
user_input_reshaped
=
tf
.
keras
.
layers
.
Lambda
(
lambda
x
:
_strip_first_and_last_dimension
(
x
,
batch_size
))(
user_input
)
def
mf_slice_fn
(
x
):
x
=
tf
.
squeeze
(
x
,
[
1
])
return
x
[:,
:
mf_dim
]
item_input_reshaped
=
tf
.
keras
.
layers
.
Lambda
(
lambda
x
:
_strip_first_and_last_dimension
(
x
,
batch_size
))(
item_input
)
def
mlp_slice_fn
(
x
):
x
=
tf
.
squeeze
(
x
,
[
1
])
return
x
[:,
mf_dim
:]
# It turns out to be significantly more effecient to store the MF and MLP
# embedding portions in the same table, and then slice as needed.
mf_slice_fn
=
lambda
x
:
x
[:,
:
mf_dim
]
mlp_slice_fn
=
lambda
x
:
x
[:,
mf_dim
:]
embedding_user
=
tf
.
keras
.
layers
.
Embedding
(
num_users
,
mf_dim
+
model_layers
[
0
]
//
2
,
num_users
,
mf_dim
+
model_layers
[
0
]
//
2
,
embeddings_initializer
=
embedding_initializer
,
embeddings_regularizer
=
tf
.
keras
.
regularizers
.
l2
(
mf_regularization
),
input_length
=
1
,
name
=
"embedding_user"
)(
user_input_reshaped
if
need_strip
else
user_input
)
input_length
=
1
,
name
=
"embedding_user"
)(
user_input
)
embedding_item
=
tf
.
keras
.
layers
.
Embedding
(
num_items
,
mf_dim
+
model_layers
[
0
]
//
2
,
num_items
,
mf_dim
+
model_layers
[
0
]
//
2
,
embeddings_initializer
=
embedding_initializer
,
embeddings_regularizer
=
tf
.
keras
.
regularizers
.
l2
(
mf_regularization
),
input_length
=
1
,
name
=
"embedding_item"
)(
item_input_reshaped
if
need_strip
else
item_input
)
input_length
=
1
,
name
=
"embedding_item"
)(
item_input
)
# GMF part
mf_user_latent
=
tf
.
keras
.
layers
.
Lambda
(
...
...
official/utils/misc/distribution_utils.py
View file @
36ef0b7a
...
...
@@ -24,6 +24,8 @@ import random
import
string
import
tensorflow
as
tf
from
official.utils.misc
import
tpu_lib
def
_collective_communication
(
all_reduce_alg
):
"""Return a CollectiveCommunication based on all_reduce_alg.
...
...
@@ -83,16 +85,18 @@ def get_distribution_strategy(distribution_strategy="default",
num_gpus
=
0
,
num_workers
=
1
,
all_reduce_alg
=
None
,
num_packs
=
1
):
num_packs
=
1
,
tpu_address
=
None
):
"""Return a DistributionStrategy for running the model.
Args:
distribution_strategy: a string specifying which distribution strategy to
use. Accepted values are 'off', 'default', 'one_device', 'mirrored',
'parameter_server', 'multi_worker_mirrored', case insensitive.
'off' means
not to use Distribution Strategy; 'default' means to choose from
'parameter_server', 'multi_worker_mirrored',
and 'tpu' --
case insensitive.
'off' means
not to use Distribution Strategy; 'default' means to choose from
`MirroredStrategy`, `MultiWorkerMirroredStrategy`, or `OneDeviceStrategy`
according to the number of GPUs and number of workers.
according to the number of GPUs and number of workers. 'tpu' means to use
TPUStrategy using `tpu_address`.
num_gpus: Number of GPUs to run this model.
num_workers: Number of workers to run this model.
all_reduce_alg: Optional. Specifies which algorithm to use when performing
...
...
@@ -102,12 +106,14 @@ def get_distribution_strategy(distribution_strategy="default",
device topology.
num_packs: Optional. Sets the `num_packs` in `tf.distribute.NcclAllReduce`
or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`.
tpu_address: Optional. String that represents TPU to connect to. Must not
be None if `distribution_strategy` is set to `tpu`.
Returns:
tf.distribute.DistibutionStrategy object.
Raises:
ValueError: if `distribution_strategy` is 'off' or 'one_device' and
`num_gpus` is larger than 1; or `num_gpus` is negative.
`num_gpus` is larger than 1; or `num_gpus` is negative or if
`distribution_strategy` is `tpu` but `tpu_address` is not specified.
"""
if
num_gpus
<
0
:
raise
ValueError
(
"`num_gpus` can not be negative."
)
...
...
@@ -120,6 +126,15 @@ def get_distribution_strategy(distribution_strategy="default",
"flag cannot be set to 'off'."
.
format
(
num_gpus
,
num_workers
))
return
None
if
distribution_strategy
==
"tpu"
:
if
not
tpu_address
:
raise
ValueError
(
"`tpu_address` must be specified when using "
"TPUStrategy."
)
# Initialize TPU System.
cluster_resolver
=
tpu_lib
.
tpu_initialize
(
tpu_address
)
return
tf
.
distribute
.
experimental
.
TPUStrategy
(
cluster_resolver
)
if
distribution_strategy
==
"multi_worker_mirrored"
:
return
tf
.
distribute
.
experimental
.
MultiWorkerMirroredStrategy
(
communication
=
_collective_communication
(
all_reduce_alg
))
...
...
official/
bert
/tpu_lib.py
→
official/
utils/misc
/tpu_lib.py
View file @
36ef0b7a
...
...
@@ -31,3 +31,8 @@ def tpu_initialize(tpu_address):
tf
.
config
.
experimental_connect_to_host
(
cluster_resolver
.
master
())
tf
.
tpu
.
experimental
.
initialize_tpu_system
(
cluster_resolver
)
return
cluster_resolver
def
get_primary_cpu_task
(
use_remote_tpu
=
False
):
"""Returns remote TPU worker address. No-op for GPU/CPU training."""
return
"/job:worker"
if
use_remote_tpu
else
""
research/lstm_object_detection/README.md
View file @
36ef0b7a
...
...
@@ -32,3 +32,8 @@ https://scholar.googleusercontent.com/scholar.bib?q=info:rLqvkztmWYgJ:scholar.go
*
yinxiao@google.com
*
menglong@google.com
*
yongzhe@google.com
## Table of Contents
*
<a
href=
'g3doc/exporting_models.md'
>
Exporting a trained model
</a>
research/lstm_object_detection/configs/lstm_ssd_interleaved_mobilenet_v2_imagenet.config
0 → 100644
View file @
36ef0b7a
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# For training on Imagenet Video with LSTM Interleaved Mobilenet V2
[
lstm_object_detection
.
protos
.
lstm_model
] {
train_unroll_length
:
4
eval_unroll_length
:
4
lstm_state_depth
:
320
depth_multipliers
:
1
.
4
depth_multipliers
:
0
.
35
pre_bottleneck
:
true
low_res
:
true
train_interleave_method
:
'RANDOM_SKIP_SMALL'
eval_interleave_method
:
'SKIP3'
}
model
{
ssd
{
num_classes
:
30
# Num of class for imagenet vid dataset.
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
anchor_generator
{
ssd_anchor_generator
{
num_layers
:
5
min_scale
:
0
.
2
max_scale
:
0
.
95
aspect_ratios
:
1
.
0
aspect_ratios
:
2
.
0
aspect_ratios
:
0
.
5
aspect_ratios
:
3
.
0
aspect_ratios
:
0
.
3333
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
320
width
:
320
}
}
box_predictor
{
convolutional_box_predictor
{
min_depth
:
0
max_depth
:
0
num_layers_before_predictor
:
3
use_dropout
:
false
dropout_keep_probability
:
0
.
8
kernel_size
:
3
box_code_size
:
4
apply_sigmoid_to_scores
:
false
use_depthwise
:
true
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
train
:
true
,
scale
:
true
,
center
:
true
,
decay
:
0
.
9997
,
epsilon
:
0
.
001
,
}
}
}
}
feature_extractor
{
type
:
'lstm_ssd_interleaved_mobilenet_v2'
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
train
:
true
,
scale
:
true
,
center
:
true
,
decay
:
0
.
9997
,
epsilon
:
0
.
001
,
}
}
}
loss
{
classification_loss
{
weighted_sigmoid
{
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
hard_example_miner
{
num_hard_examples
:
3000
iou_threshold
:
0
.
99
loss_type
:
CLASSIFICATION
max_negatives_per_positive
:
3
min_negatives_per_image
:
0
}
classification_weight
:
1
.
0
localization_weight
:
4
.
0
}
normalize_loss_by_num_matches
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
: -
20
.
0
iou_threshold
:
0
.
5
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
batch_size
:
8
optimizer
{
use_moving_average
:
false
rms_prop_optimizer
: {
learning_rate
: {
exponential_decay_learning_rate
{
initial_learning_rate
:
0
.
002
decay_steps
:
200000
decay_factor
:
0
.
95
}
}
momentum_optimizer_value
:
0
.
9
decay
:
0
.
9
epsilon
:
1
.
0
}
}
gradient_clipping_by_norm
:
10
.
0
batch_queue_capacity
:
12
prefetch_queue_capacity
:
4
}
train_input_reader
: {
shuffle_buffer_size
:
32
queue_capacity
:
12
prefetch_size
:
12
min_after_dequeue
:
4
label_map_path
:
"path/to/label_map"
external_input_reader
{
[
lstm_object_detection
.
protos
.
GoogleInputReader
.
google_input_reader
] {
tf_record_video_input_reader
: {
input_path
:
'/data/lstm_detection/tfrecords/test.tfrecord'
data_type
:
TF_SEQUENCE_EXAMPLE
video_length
:
4
}
}
}
}
eval_config
: {
metrics_set
:
"coco_evaluation_all_frames"
use_moving_averages
:
true
min_score_threshold
:
0
.
5
max_num_boxes_to_visualize
:
300
visualize_groundtruth_boxes
:
true
groundtruth_box_visualization_color
:
"red"
}
eval_input_reader
{
label_map_path
:
"path/to/label_map"
shuffle
:
true
num_epochs
:
1
num_parallel_batches
:
1
num_readers
:
1
external_input_reader
{
[
lstm_object_detection
.
protos
.
GoogleInputReader
.
google_input_reader
] {
tf_record_video_input_reader
: {
input_path
:
"path/to/sequence_example/data"
data_type
:
TF_SEQUENCE_EXAMPLE
video_length
:
10
}
}
}
}
eval_input_reader
: {
label_map_path
:
"path/to/label_map"
external_input_reader
{
[
lstm_object_detection
.
protos
.
GoogleInputReader
.
google_input_reader
] {
tf_record_video_input_reader
: {
input_path
:
"path/to/sequence_example/data"
data_type
:
TF_SEQUENCE_EXAMPLE
video_length
:
4
}
}
}
shuffle
:
true
num_readers
:
1
}
research/lstm_object_detection/eval.py
View file @
36ef0b7a
...
...
@@ -27,8 +27,6 @@ import functools
import
os
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
google3.pyglib
import
app
from
google3.pyglib
import
flags
from
lstm_object_detection
import
evaluator
from
lstm_object_detection
import
model_builder
from
lstm_object_detection.inputs
import
seq_dataset_builder
...
...
@@ -107,4 +105,4 @@ def main(unused_argv):
FLAGS
.
checkpoint_dir
,
FLAGS
.
eval_dir
)
if
__name__
==
'__main__'
:
app
.
run
()
tf
.
app
.
run
()
research/lstm_object_detection/export_tflite_lstd_graph.py
0 → 100644
View file @
36ef0b7a
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Exports an LSTM detection model to use with tf-lite.
Outputs file:
* A tflite compatible frozen graph - $output_directory/tflite_graph.pb
The exported graph has the following input and output nodes.
Inputs:
'input_video_tensor': a float32 tensor of shape
[unroll_length, height, width, 3] containing the normalized input image.
Note that the height and width must be compatible with the height and
width configured in the fixed_shape_image resizer options in the pipeline
config proto.
Outputs:
If add_postprocessing_op is true: frozen graph adds a
TFLite_Detection_PostProcess custom op node has four outputs:
detection_boxes: a float32 tensor of shape [1, num_boxes, 4] with box
locations
detection_classes: a float32 tensor of shape [1, num_boxes]
with class indices
detection_scores: a float32 tensor of shape [1, num_boxes]
with class scores
num_boxes: a float32 tensor of size 1 containing the number of detected boxes
else:
the graph has three outputs:
'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4]
containing the encoded box predictions.
'raw_outputs/class_predictions': a float32 tensor of shape
[1, num_anchors, num_classes] containing the class scores for each anchor
after applying score conversion.
'anchors': a float32 constant tensor of shape [num_anchors, 4]
containing the anchor boxes.
Example Usage:
--------------
python lstm_object_detection/export_tflite_lstd_graph.py \
--pipeline_config_path path/to/lstm_pipeline.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
The expected output would be in the directory
path/to/exported_model_directory (which is created if it does not exist)
with contents:
- tflite_graph.pbtxt
- tflite_graph.pb
Config overrides (see the `config_override` flag) are text protobufs
(also of type pipeline_pb2.TrainEvalPipelineConfig) which are used to override
certain fields in the provided pipeline_config_path. These are useful for
making small changes to the inference graph that differ from the training or
eval config.
Example Usage (in which we change the NMS iou_threshold to be 0.5 and
NMS score_threshold to be 0.0):
python lstm_object_detection/export_tflite_lstd_graph.py \
--pipeline_config_path path/to/lstm_pipeline.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
--config_override " \
model{ \
ssd{ \
post_processing { \
batch_non_max_suppression { \
score_threshold: 0.0 \
iou_threshold: 0.5 \
} \
} \
} \
} \
"
"""
import
tensorflow
as
tf
from
lstm_object_detection.utils
import
config_util
from
lstm_object_detection
import
export_tflite_lstd_graph_lib
flags
=
tf
.
app
.
flags
flags
.
DEFINE_string
(
'output_directory'
,
None
,
'Path to write outputs.'
)
flags
.
DEFINE_string
(
'pipeline_config_path'
,
None
,
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file.'
)
flags
.
DEFINE_string
(
'trained_checkpoint_prefix'
,
None
,
'Checkpoint prefix.'
)
flags
.
DEFINE_integer
(
'max_detections'
,
10
,
'Maximum number of detections (boxes) to show.'
)
flags
.
DEFINE_integer
(
'max_classes_per_detection'
,
1
,
'Maximum number of classes to output per detection box.'
)
flags
.
DEFINE_integer
(
'detections_per_class'
,
100
,
'Number of anchors used per class in Regular Non-Max-Suppression.'
)
flags
.
DEFINE_bool
(
'add_postprocessing_op'
,
True
,
'Add TFLite custom op for postprocessing to the graph.'
)
flags
.
DEFINE_bool
(
'use_regular_nms'
,
False
,
'Flag to set postprocessing op to use Regular NMS instead of Fast NMS.'
)
flags
.
DEFINE_string
(
'config_override'
,
''
,
'pipeline_pb2.TrainEvalPipelineConfig '
'text proto to override pipeline_config_path.'
)
FLAGS
=
flags
.
FLAGS
def
main
(
argv
):
del
argv
# Unused.
flags
.
mark_flag_as_required
(
'output_directory'
)
flags
.
mark_flag_as_required
(
'pipeline_config_path'
)
flags
.
mark_flag_as_required
(
'trained_checkpoint_prefix'
)
pipeline_config
=
config_util
.
get_configs_from_pipeline_file
(
FLAGS
.
pipeline_config_path
)
export_tflite_lstd_graph_lib
.
export_tflite_graph
(
pipeline_config
,
FLAGS
.
trained_checkpoint_prefix
,
FLAGS
.
output_directory
,
FLAGS
.
add_postprocessing_op
,
FLAGS
.
max_detections
,
FLAGS
.
max_classes_per_detection
,
use_regular_nms
=
FLAGS
.
use_regular_nms
)
if
__name__
==
'__main__'
:
tf
.
app
.
run
(
main
)
research/lstm_object_detection/export_tflite_lstd_graph_lib.py
0 → 100644
View file @
36ef0b7a
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Exports detection models to use with tf-lite.
See export_tflite_lstd_graph.py for usage.
"""
import
os
import
tempfile
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.core.framework
import
attr_value_pb2
from
tensorflow.core.framework
import
types_pb2
from
tensorflow.core.protobuf
import
saver_pb2
from
tensorflow.tools.graph_transforms
import
TransformGraph
from
object_detection
import
exporter
from
object_detection.builders
import
graph_rewriter_builder
from
object_detection.builders
import
post_processing_builder
from
object_detection.core
import
box_list
from
lstm_object_detection
import
model_builder
_DEFAULT_NUM_CHANNELS
=
3
_DEFAULT_NUM_COORD_BOX
=
4
def
get_const_center_size_encoded_anchors
(
anchors
):
"""Exports center-size encoded anchors as a constant tensor.
Args:
anchors: a float32 tensor of shape [num_anchors, 4] containing the anchor
boxes
Returns:
encoded_anchors: a float32 constant tensor of shape [num_anchors, 4]
containing the anchor boxes.
"""
anchor_boxlist
=
box_list
.
BoxList
(
anchors
)
y
,
x
,
h
,
w
=
anchor_boxlist
.
get_center_coordinates_and_sizes
()
num_anchors
=
y
.
get_shape
().
as_list
()
with
tf
.
Session
()
as
sess
:
y_out
,
x_out
,
h_out
,
w_out
=
sess
.
run
([
y
,
x
,
h
,
w
])
encoded_anchors
=
tf
.
constant
(
np
.
transpose
(
np
.
stack
((
y_out
,
x_out
,
h_out
,
w_out
))),
dtype
=
tf
.
float32
,
shape
=
[
num_anchors
[
0
],
_DEFAULT_NUM_COORD_BOX
],
name
=
'anchors'
)
return
encoded_anchors
def
append_postprocessing_op
(
frozen_graph_def
,
max_detections
,
max_classes_per_detection
,
nms_score_threshold
,
nms_iou_threshold
,
num_classes
,
scale_values
,
detections_per_class
=
100
,
use_regular_nms
=
False
):
"""Appends postprocessing custom op.
Args:
frozen_graph_def: Frozen GraphDef for SSD model after freezing the
checkpoint
max_detections: Maximum number of detections (boxes) to show
max_classes_per_detection: Number of classes to display per detection
nms_score_threshold: Score threshold used in Non-maximal suppression in
post-processing
nms_iou_threshold: Intersection-over-union threshold used in Non-maximal
suppression in post-processing
num_classes: number of classes in SSD detector
scale_values: scale values is a dict with following key-value pairs
{y_scale: 10, x_scale: 10, h_scale: 5, w_scale: 5} that are used in decode
centersize boxes
detections_per_class: In regular NonMaxSuppression, number of anchors used
for NonMaxSuppression per class
use_regular_nms: Flag to set postprocessing op to use Regular NMS instead
of Fast NMS.
Returns:
transformed_graph_def: Frozen GraphDef with postprocessing custom op
appended
TFLite_Detection_PostProcess custom op node has four outputs:
detection_boxes: a float32 tensor of shape [1, num_boxes, 4] with box
locations
detection_classes: a float32 tensor of shape [1, num_boxes]
with class indices
detection_scores: a float32 tensor of shape [1, num_boxes]
with class scores
num_boxes: a float32 tensor of size 1 containing the number of detected
boxes
"""
new_output
=
frozen_graph_def
.
node
.
add
()
new_output
.
op
=
'TFLite_Detection_PostProcess'
new_output
.
name
=
'TFLite_Detection_PostProcess'
new_output
.
attr
[
'_output_quantized'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
b
=
True
))
new_output
.
attr
[
'_output_types'
].
list
.
type
.
extend
([
types_pb2
.
DT_FLOAT
,
types_pb2
.
DT_FLOAT
,
types_pb2
.
DT_FLOAT
,
types_pb2
.
DT_FLOAT
])
new_output
.
attr
[
'_support_output_type_float_in_quantized_op'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
b
=
True
))
new_output
.
attr
[
'max_detections'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
i
=
max_detections
))
new_output
.
attr
[
'max_classes_per_detection'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
i
=
max_classes_per_detection
))
new_output
.
attr
[
'nms_score_threshold'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
f
=
nms_score_threshold
.
pop
()))
new_output
.
attr
[
'nms_iou_threshold'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
f
=
nms_iou_threshold
.
pop
()))
new_output
.
attr
[
'num_classes'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
i
=
num_classes
))
new_output
.
attr
[
'y_scale'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
f
=
scale_values
[
'y_scale'
].
pop
()))
new_output
.
attr
[
'x_scale'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
f
=
scale_values
[
'x_scale'
].
pop
()))
new_output
.
attr
[
'h_scale'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
f
=
scale_values
[
'h_scale'
].
pop
()))
new_output
.
attr
[
'w_scale'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
f
=
scale_values
[
'w_scale'
].
pop
()))
new_output
.
attr
[
'detections_per_class'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
i
=
detections_per_class
))
new_output
.
attr
[
'use_regular_nms'
].
CopyFrom
(
attr_value_pb2
.
AttrValue
(
b
=
use_regular_nms
))
new_output
.
input
.
extend
(
[
'raw_outputs/box_encodings'
,
'raw_outputs/class_predictions'
,
'anchors'
])
# Transform the graph to append new postprocessing op
input_names
=
[]
output_names
=
[
'TFLite_Detection_PostProcess'
]
transforms
=
[
'strip_unused_nodes'
]
transformed_graph_def
=
TransformGraph
(
frozen_graph_def
,
input_names
,
output_names
,
transforms
)
return
transformed_graph_def
def
export_tflite_graph
(
pipeline_config
,
trained_checkpoint_prefix
,
output_dir
,
add_postprocessing_op
,
max_detections
,
max_classes_per_detection
,
detections_per_class
=
100
,
use_regular_nms
=
False
,
binary_graph_name
=
'tflite_graph.pb'
,
txt_graph_name
=
'tflite_graph.pbtxt'
):
"""Exports a tflite compatible graph and anchors for ssd detection model.
Anchors are written to a tensor and tflite compatible graph
is written to output_dir/tflite_graph.pb.
Args:
pipeline_config: Dictionary of configuration objects. Keys are `model`, `train_config`,
`train_input_config`, `eval_config`, `eval_input_config`, `lstm_model`.
Value are the corresponding config objects.
trained_checkpoint_prefix: a file prefix for the checkpoint containing the
trained parameters of the SSD model.
output_dir: A directory to write the tflite graph and anchor file to.
add_postprocessing_op: If add_postprocessing_op is true: frozen graph adds a
TFLite_Detection_PostProcess custom op
max_detections: Maximum number of detections (boxes) to show
max_classes_per_detection: Number of classes to display per detection
detections_per_class: In regular NonMaxSuppression, number of anchors used
for NonMaxSuppression per class
use_regular_nms: Flag to set postprocessing op to use Regular NMS instead
of Fast NMS.
binary_graph_name: Name of the exported graph file in binary format.
txt_graph_name: Name of the exported graph file in text format.
Raises:
ValueError: if the pipeline config contains models other than ssd or uses an
fixed_shape_resizer and provides a shape as well.
"""
model_config
=
pipeline_config
[
'model'
]
lstm_config
=
pipeline_config
[
'lstm_model'
]
eval_config
=
pipeline_config
[
'eval_config'
]
tf
.
gfile
.
MakeDirs
(
output_dir
)
if
model_config
.
WhichOneof
(
'model'
)
!=
'ssd'
:
raise
ValueError
(
'Only ssd models are supported in tflite. '
'Found {} in config'
.
format
(
model_config
.
WhichOneof
(
'model'
)))
num_classes
=
model_config
.
ssd
.
num_classes
nms_score_threshold
=
{
model_config
.
ssd
.
post_processing
.
batch_non_max_suppression
.
score_threshold
}
nms_iou_threshold
=
{
model_config
.
ssd
.
post_processing
.
batch_non_max_suppression
.
iou_threshold
}
scale_values
=
{}
scale_values
[
'y_scale'
]
=
{
model_config
.
ssd
.
box_coder
.
faster_rcnn_box_coder
.
y_scale
}
scale_values
[
'x_scale'
]
=
{
model_config
.
ssd
.
box_coder
.
faster_rcnn_box_coder
.
x_scale
}
scale_values
[
'h_scale'
]
=
{
model_config
.
ssd
.
box_coder
.
faster_rcnn_box_coder
.
height_scale
}
scale_values
[
'w_scale'
]
=
{
model_config
.
ssd
.
box_coder
.
faster_rcnn_box_coder
.
width_scale
}
image_resizer_config
=
model_config
.
ssd
.
image_resizer
image_resizer
=
image_resizer_config
.
WhichOneof
(
'image_resizer_oneof'
)
num_channels
=
_DEFAULT_NUM_CHANNELS
if
image_resizer
==
'fixed_shape_resizer'
:
height
=
image_resizer_config
.
fixed_shape_resizer
.
height
width
=
image_resizer_config
.
fixed_shape_resizer
.
width
if
image_resizer_config
.
fixed_shape_resizer
.
convert_to_grayscale
:
num_channels
=
1
#TODO(richardbrks) figure out how to make with a None defined batch size
shape
=
[
lstm_config
.
eval_unroll_length
,
height
,
width
,
num_channels
]
else
:
raise
ValueError
(
'Only fixed_shape_resizer'
'is supported with tflite. Found {}'
.
format
(
image_resizer_config
.
WhichOneof
(
'image_resizer_oneof'
)))
video_tensor
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
shape
,
name
=
'input_video_tensor'
)
detection_model
=
model_builder
.
build
(
model_config
,
lstm_config
,
is_training
=
False
)
preprocessed_video
,
true_image_shapes
=
detection_model
.
preprocess
(
tf
.
to_float
(
video_tensor
))
predicted_tensors
=
detection_model
.
predict
(
preprocessed_video
,
true_image_shapes
)
# predicted_tensors = detection_model.postprocess(predicted_tensors,
# true_image_shapes)
# The score conversion occurs before the post-processing custom op
_
,
score_conversion_fn
=
post_processing_builder
.
build
(
model_config
.
ssd
.
post_processing
)
class_predictions
=
score_conversion_fn
(
predicted_tensors
[
'class_predictions_with_background'
])
with
tf
.
name_scope
(
'raw_outputs'
):
# 'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4]
# containing the encoded box predictions. Note that these are raw
# predictions and no Non-Max suppression is applied on them and
# no decode center size boxes is applied to them.
tf
.
identity
(
predicted_tensors
[
'box_encodings'
],
name
=
'box_encodings'
)
# 'raw_outputs/class_predictions': a float32 tensor of shape
# [1, num_anchors, num_classes] containing the class scores for each anchor
# after applying score conversion.
tf
.
identity
(
class_predictions
,
name
=
'class_predictions'
)
# 'anchors': a float32 tensor of shape
# [4, num_anchors] containing the anchors as a constant node.
tf
.
identity
(
get_const_center_size_encoded_anchors
(
predicted_tensors
[
'anchors'
]),
name
=
'anchors'
)
# Add global step to the graph, so we know the training step number when we
# evaluate the model.
tf
.
train
.
get_or_create_global_step
()
# graph rewriter
is_quantized
=
(
'graph_rewriter'
in
pipeline_config
)
if
is_quantized
:
graph_rewriter_config
=
pipeline_config
[
'graph_rewriter'
]
graph_rewriter_fn
=
graph_rewriter_builder
.
build
(
graph_rewriter_config
,
is_training
=
False
,
is_export
=
True
)
graph_rewriter_fn
()
if
model_config
.
ssd
.
feature_extractor
.
HasField
(
'fpn'
):
exporter
.
rewrite_nn_resize_op
(
is_quantized
)
# freeze the graph
saver_kwargs
=
{}
if
eval_config
.
use_moving_averages
:
saver_kwargs
[
'write_version'
]
=
saver_pb2
.
SaverDef
.
V1
moving_average_checkpoint
=
tempfile
.
NamedTemporaryFile
()
exporter
.
replace_variable_values_with_moving_averages
(
tf
.
get_default_graph
(),
trained_checkpoint_prefix
,
moving_average_checkpoint
.
name
)
checkpoint_to_use
=
moving_average_checkpoint
.
name
else
:
checkpoint_to_use
=
trained_checkpoint_prefix
saver
=
tf
.
train
.
Saver
(
**
saver_kwargs
)
input_saver_def
=
saver
.
as_saver_def
()
frozen_graph_def
=
exporter
.
freeze_graph_with_def_protos
(
input_graph_def
=
tf
.
get_default_graph
().
as_graph_def
(),
input_saver_def
=
input_saver_def
,
input_checkpoint
=
checkpoint_to_use
,
output_node_names
=
','
.
join
([
'raw_outputs/box_encodings'
,
'raw_outputs/class_predictions'
,
'anchors'
]),
restore_op_name
=
'save/restore_all'
,
filename_tensor_name
=
'save/Const:0'
,
clear_devices
=
True
,
output_graph
=
''
,
initializer_nodes
=
''
)
# Add new operation to do post processing in a custom op (TF Lite only)
#(richardbrks) Do use this or detection_model.postprocess?
if
add_postprocessing_op
:
transformed_graph_def
=
append_postprocessing_op
(
frozen_graph_def
,
max_detections
,
max_classes_per_detection
,
nms_score_threshold
,
nms_iou_threshold
,
num_classes
,
scale_values
,
detections_per_class
,
use_regular_nms
)
else
:
# Return frozen without adding post-processing custom op
transformed_graph_def
=
frozen_graph_def
binary_graph
=
os
.
path
.
join
(
output_dir
,
binary_graph_name
)
with
tf
.
gfile
.
GFile
(
binary_graph
,
'wb'
)
as
f
:
f
.
write
(
transformed_graph_def
.
SerializeToString
())
txt_graph
=
os
.
path
.
join
(
output_dir
,
txt_graph_name
)
with
tf
.
gfile
.
GFile
(
txt_graph
,
'w'
)
as
f
:
f
.
write
(
str
(
transformed_graph_def
))
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment