Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dcuai
dlexamples
Commits
cf66c525
Commit
cf66c525
authored
Apr 15, 2022
by
qianyj
Browse files
update some TF file
parent
6b6f8b0c
Changes
264
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
4574 additions
and
0 deletions
+4574
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/configs.py
...Validation/ResNet50_Official/official/nlp/bert/configs.py
+104
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/export_tfhub.py
...ation/ResNet50_Official/official/nlp/bert/export_tfhub.py
+139
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/export_tfhub_test.py
.../ResNet50_Official/official/nlp/bert/export_tfhub_test.py
+108
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/input_pipeline.py
...ion/ResNet50_Official/official/nlp/bert/input_pipeline.py
+302
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/model_saving_utils.py
...ResNet50_Official/official/nlp/bert/model_saving_utils.py
+68
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/model_training_utils.py
...sNet50_Official/official/nlp/bert/model_training_utils.py
+590
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/model_training_utils_test.py
...0_Official/official/nlp/bert/model_training_utils_test.py
+306
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/run_classifier.py
...ion/ResNet50_Official/official/nlp/bert/run_classifier.py
+516
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/run_pretraining.py
...on/ResNet50_Official/official/nlp/bert/run_pretraining.py
+218
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/run_squad.py
...lidation/ResNet50_Official/official/nlp/bert/run_squad.py
+148
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/run_squad_helper.py
...n/ResNet50_Official/official/nlp/bert/run_squad_helper.py
+472
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/serving.py
...Validation/ResNet50_Official/official/nlp/bert/serving.py
+133
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/squad_evaluate_v1_1.py
...esNet50_Official/official/nlp/bert/squad_evaluate_v1_1.py
+106
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/squad_evaluate_v2_0.py
...esNet50_Official/official/nlp/bert/squad_evaluate_v2_0.py
+249
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/tf1_checkpoint_converter_lib.py
...fficial/official/nlp/bert/tf1_checkpoint_converter_lib.py
+201
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/tf2_encoder_checkpoint_converter.py
...ial/official/nlp/bert/tf2_encoder_checkpoint_converter.py
+160
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/tokenization.py
...ation/ResNet50_Official/official/nlp/bert/tokenization.py
+541
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/tokenization_test.py
.../ResNet50_Official/official/nlp/bert/tokenization_test.py
+156
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/configs/__init__.py
...dation/ResNet50_Official/official/nlp/configs/__init__.py
+14
-0
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/configs/bert.py
...Validation/ResNet50_Official/official/nlp/configs/bert.py
+43
-0
No files found.
Too many changes to show.
To preserve performance only
264 of 264+
files are displayed.
Plain diff
Email patch
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/configs.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The main BERT model and related functions."""
import
copy
import
json
import
six
import
tensorflow
as
tf
class
BertConfig
(
object
):
"""Configuration for `BertModel`."""
def
__init__
(
self
,
vocab_size
,
hidden_size
=
768
,
num_hidden_layers
=
12
,
num_attention_heads
=
12
,
intermediate_size
=
3072
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
initializer_range
=
0.02
,
embedding_size
=
None
,
backward_compatible
=
True
):
"""Constructs BertConfig.
Args:
vocab_size: Vocabulary size of `inputs_ids` in `BertModel`.
hidden_size: Size of the encoder layers and the pooler layer.
num_hidden_layers: Number of hidden layers in the Transformer encoder.
num_attention_heads: Number of attention heads for each attention layer in
the Transformer encoder.
intermediate_size: The size of the "intermediate" (i.e., feed-forward)
layer in the Transformer encoder.
hidden_act: The non-linear activation function (function or string) in the
encoder and pooler.
hidden_dropout_prob: The dropout probability for all fully connected
layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: The dropout ratio for the attention
probabilities.
max_position_embeddings: The maximum sequence length that this model might
ever be used with. Typically set this to something large just in case
(e.g., 512 or 1024 or 2048).
type_vocab_size: The vocabulary size of the `token_type_ids` passed into
`BertModel`.
initializer_range: The stdev of the truncated_normal_initializer for
initializing all weight matrices.
embedding_size: (Optional) width of the factorized word embeddings.
backward_compatible: Boolean, whether the variables shape are compatible
with checkpoints converted from TF 1.x BERT.
"""
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
hidden_act
=
hidden_act
self
.
intermediate_size
=
intermediate_size
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
initializer_range
=
initializer_range
self
.
embedding_size
=
embedding_size
self
.
backward_compatible
=
backward_compatible
@
classmethod
def
from_dict
(
cls
,
json_object
):
"""Constructs a `BertConfig` from a Python dictionary of parameters."""
config
=
BertConfig
(
vocab_size
=
None
)
for
(
key
,
value
)
in
six
.
iteritems
(
json_object
):
config
.
__dict__
[
key
]
=
value
return
config
@
classmethod
def
from_json_file
(
cls
,
json_file
):
"""Constructs a `BertConfig` from a json file of parameters."""
with
tf
.
io
.
gfile
.
GFile
(
json_file
,
"r"
)
as
reader
:
text
=
reader
.
read
()
return
cls
.
from_dict
(
json
.
loads
(
text
))
def
to_dict
(
self
):
"""Serializes this instance to a Python dictionary."""
output
=
copy
.
deepcopy
(
self
.
__dict__
)
return
output
def
to_json_string
(
self
):
"""Serializes this instance to a JSON string."""
return
json
.
dumps
(
self
.
to_dict
(),
indent
=
2
,
sort_keys
=
True
)
+
"
\n
"
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/export_tfhub.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A script to export BERT as a TF-Hub SavedModel.
This script is **DEPRECATED** for exporting BERT encoder models;
see the error message in by main() for details.
"""
from
typing
import
Text
# Import libraries
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
tensorflow
as
tf
from
official.nlp.bert
import
bert_models
from
official.nlp.bert
import
configs
FLAGS
=
flags
.
FLAGS
flags
.
DEFINE_string
(
"bert_config_file"
,
None
,
"Bert configuration file to define core bert layers."
)
flags
.
DEFINE_string
(
"model_checkpoint_path"
,
None
,
"File path to TF model checkpoint."
)
flags
.
DEFINE_string
(
"export_path"
,
None
,
"TF-Hub SavedModel destination path."
)
flags
.
DEFINE_string
(
"vocab_file"
,
None
,
"The vocabulary file that the BERT model was trained on."
)
flags
.
DEFINE_bool
(
"do_lower_case"
,
None
,
"Whether to lowercase. If None, "
"do_lower_case will be enabled if 'uncased' appears in the "
"name of --vocab_file"
)
flags
.
DEFINE_enum
(
"model_type"
,
"encoder"
,
[
"encoder"
,
"squad"
],
"What kind of BERT model to export."
)
def
create_bert_model
(
bert_config
:
configs
.
BertConfig
)
->
tf
.
keras
.
Model
:
"""Creates a BERT keras core model from BERT configuration.
Args:
bert_config: A `BertConfig` to create the core model.
Returns:
A keras model.
"""
# Adds input layers just as placeholders.
input_word_ids
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
,
name
=
"input_word_ids"
)
input_mask
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
,
name
=
"input_mask"
)
input_type_ids
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
,
name
=
"input_type_ids"
)
transformer_encoder
=
bert_models
.
get_transformer_encoder
(
bert_config
,
sequence_length
=
None
)
sequence_output
,
pooled_output
=
transformer_encoder
(
[
input_word_ids
,
input_mask
,
input_type_ids
])
# To keep consistent with legacy hub modules, the outputs are
# "pooled_output" and "sequence_output".
return
tf
.
keras
.
Model
(
inputs
=
[
input_word_ids
,
input_mask
,
input_type_ids
],
outputs
=
[
pooled_output
,
sequence_output
]),
transformer_encoder
def
export_bert_tfhub
(
bert_config
:
configs
.
BertConfig
,
model_checkpoint_path
:
Text
,
hub_destination
:
Text
,
vocab_file
:
Text
,
do_lower_case
:
bool
=
None
):
"""Restores a tf.keras.Model and saves for TF-Hub."""
# If do_lower_case is not explicit, default to checking whether "uncased" is
# in the vocab file name
if
do_lower_case
is
None
:
do_lower_case
=
"uncased"
in
vocab_file
logging
.
info
(
"Using do_lower_case=%s based on name of vocab_file=%s"
,
do_lower_case
,
vocab_file
)
core_model
,
encoder
=
create_bert_model
(
bert_config
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
encoder
,
# Legacy checkpoints.
encoder
=
encoder
)
checkpoint
.
restore
(
model_checkpoint_path
).
assert_existing_objects_matched
()
core_model
.
vocab_file
=
tf
.
saved_model
.
Asset
(
vocab_file
)
core_model
.
do_lower_case
=
tf
.
Variable
(
do_lower_case
,
trainable
=
False
)
core_model
.
save
(
hub_destination
,
include_optimizer
=
False
,
save_format
=
"tf"
)
def
export_bert_squad_tfhub
(
bert_config
:
configs
.
BertConfig
,
model_checkpoint_path
:
Text
,
hub_destination
:
Text
,
vocab_file
:
Text
,
do_lower_case
:
bool
=
None
):
"""Restores a tf.keras.Model for BERT with SQuAD and saves for TF-Hub."""
# If do_lower_case is not explicit, default to checking whether "uncased" is
# in the vocab file name
if
do_lower_case
is
None
:
do_lower_case
=
"uncased"
in
vocab_file
logging
.
info
(
"Using do_lower_case=%s based on name of vocab_file=%s"
,
do_lower_case
,
vocab_file
)
span_labeling
,
_
=
bert_models
.
squad_model
(
bert_config
,
max_seq_length
=
None
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
span_labeling
)
checkpoint
.
restore
(
model_checkpoint_path
).
assert_existing_objects_matched
()
span_labeling
.
vocab_file
=
tf
.
saved_model
.
Asset
(
vocab_file
)
span_labeling
.
do_lower_case
=
tf
.
Variable
(
do_lower_case
,
trainable
=
False
)
span_labeling
.
save
(
hub_destination
,
include_optimizer
=
False
,
save_format
=
"tf"
)
def
main
(
_
):
bert_config
=
configs
.
BertConfig
.
from_json_file
(
FLAGS
.
bert_config_file
)
if
FLAGS
.
model_type
==
"encoder"
:
deprecation_note
=
(
"nlp/bert/export_tfhub is **DEPRECATED** for exporting BERT encoder "
"models. Please switch to nlp/tools/export_tfhub for exporting BERT "
"(and other) encoders with dict inputs/outputs conforming to "
"https://www.tensorflow.org/hub/common_saved_model_apis/text#transformer-encoders"
)
logging
.
error
(
deprecation_note
)
print
(
"
\n\n
NOTICE:"
,
deprecation_note
,
"
\n
"
)
export_bert_tfhub
(
bert_config
,
FLAGS
.
model_checkpoint_path
,
FLAGS
.
export_path
,
FLAGS
.
vocab_file
,
FLAGS
.
do_lower_case
)
elif
FLAGS
.
model_type
==
"squad"
:
export_bert_squad_tfhub
(
bert_config
,
FLAGS
.
model_checkpoint_path
,
FLAGS
.
export_path
,
FLAGS
.
vocab_file
,
FLAGS
.
do_lower_case
)
else
:
raise
ValueError
(
"Unsupported model_type %s."
%
FLAGS
.
model_type
)
if
__name__
==
"__main__"
:
app
.
run
(
main
)
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/export_tfhub_test.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests official.nlp.bert.export_tfhub."""
import
os
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow_hub
as
hub
from
official.nlp.bert
import
configs
from
official.nlp.bert
import
export_tfhub
class
ExportTfhubTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
parameters
(
"model"
,
"encoder"
)
def
test_export_tfhub
(
self
,
ckpt_key_name
):
# Exports a savedmodel for TF-Hub
hidden_size
=
16
bert_config
=
configs
.
BertConfig
(
vocab_size
=
100
,
hidden_size
=
hidden_size
,
intermediate_size
=
32
,
max_position_embeddings
=
128
,
num_attention_heads
=
2
,
num_hidden_layers
=
1
)
bert_model
,
encoder
=
export_tfhub
.
create_bert_model
(
bert_config
)
model_checkpoint_dir
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
"checkpoint"
)
checkpoint
=
tf
.
train
.
Checkpoint
(
**
{
ckpt_key_name
:
encoder
})
checkpoint
.
save
(
os
.
path
.
join
(
model_checkpoint_dir
,
"test"
))
model_checkpoint_path
=
tf
.
train
.
latest_checkpoint
(
model_checkpoint_dir
)
vocab_file
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
"uncased_vocab.txt"
)
with
tf
.
io
.
gfile
.
GFile
(
vocab_file
,
"w"
)
as
f
:
f
.
write
(
"dummy content"
)
hub_destination
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
"hub"
)
export_tfhub
.
export_bert_tfhub
(
bert_config
,
model_checkpoint_path
,
hub_destination
,
vocab_file
)
# Restores a hub KerasLayer.
hub_layer
=
hub
.
KerasLayer
(
hub_destination
,
trainable
=
True
)
if
hasattr
(
hub_layer
,
"resolved_object"
):
# Checks meta attributes.
self
.
assertTrue
(
hub_layer
.
resolved_object
.
do_lower_case
.
numpy
())
with
tf
.
io
.
gfile
.
GFile
(
hub_layer
.
resolved_object
.
vocab_file
.
asset_path
.
numpy
())
as
f
:
self
.
assertEqual
(
"dummy content"
,
f
.
read
())
# Checks the hub KerasLayer.
for
source_weight
,
hub_weight
in
zip
(
bert_model
.
trainable_weights
,
hub_layer
.
trainable_weights
):
self
.
assertAllClose
(
source_weight
.
numpy
(),
hub_weight
.
numpy
())
seq_length
=
10
dummy_ids
=
np
.
zeros
((
2
,
seq_length
),
dtype
=
np
.
int32
)
hub_outputs
=
hub_layer
([
dummy_ids
,
dummy_ids
,
dummy_ids
])
source_outputs
=
bert_model
([
dummy_ids
,
dummy_ids
,
dummy_ids
])
# The outputs of hub module are "pooled_output" and "sequence_output",
# while the outputs of encoder is in reversed order, i.e.,
# "sequence_output" and "pooled_output".
encoder_outputs
=
reversed
(
encoder
([
dummy_ids
,
dummy_ids
,
dummy_ids
]))
self
.
assertEqual
(
hub_outputs
[
0
].
shape
,
(
2
,
hidden_size
))
self
.
assertEqual
(
hub_outputs
[
1
].
shape
,
(
2
,
seq_length
,
hidden_size
))
for
source_output
,
hub_output
,
encoder_output
in
zip
(
source_outputs
,
hub_outputs
,
encoder_outputs
):
self
.
assertAllClose
(
source_output
.
numpy
(),
hub_output
.
numpy
())
self
.
assertAllClose
(
source_output
.
numpy
(),
encoder_output
.
numpy
())
# Test that training=True makes a difference (activates dropout).
def
_dropout_mean_stddev
(
training
,
num_runs
=
20
):
input_ids
=
np
.
array
([[
14
,
12
,
42
,
95
,
99
]],
np
.
int32
)
inputs
=
[
input_ids
,
np
.
ones_like
(
input_ids
),
np
.
zeros_like
(
input_ids
)]
outputs
=
np
.
concatenate
(
[
hub_layer
(
inputs
,
training
=
training
)[
0
]
for
_
in
range
(
num_runs
)])
return
np
.
mean
(
np
.
std
(
outputs
,
axis
=
0
))
self
.
assertLess
(
_dropout_mean_stddev
(
training
=
False
),
1e-6
)
self
.
assertGreater
(
_dropout_mean_stddev
(
training
=
True
),
1e-3
)
# Test propagation of seq_length in shape inference.
input_word_ids
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
seq_length
,),
dtype
=
tf
.
int32
)
input_mask
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
seq_length
,),
dtype
=
tf
.
int32
)
input_type_ids
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
seq_length
,),
dtype
=
tf
.
int32
)
pooled_output
,
sequence_output
=
hub_layer
(
[
input_word_ids
,
input_mask
,
input_type_ids
])
self
.
assertEqual
(
pooled_output
.
shape
.
as_list
(),
[
None
,
hidden_size
])
self
.
assertEqual
(
sequence_output
.
shape
.
as_list
(),
[
None
,
seq_length
,
hidden_size
])
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/input_pipeline.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""BERT model input pipelines."""
import
tensorflow
as
tf
def
decode_record
(
record
,
name_to_features
):
"""Decodes a record to a TensorFlow example."""
example
=
tf
.
io
.
parse_single_example
(
record
,
name_to_features
)
# tf.Example only supports tf.int64, but the TPU only supports tf.int32.
# So cast all int64 to int32.
for
name
in
list
(
example
.
keys
()):
t
=
example
[
name
]
if
t
.
dtype
==
tf
.
int64
:
t
=
tf
.
cast
(
t
,
tf
.
int32
)
example
[
name
]
=
t
return
example
def
single_file_dataset
(
input_file
,
name_to_features
,
num_samples
=
None
):
"""Creates a single-file dataset to be passed for BERT custom training."""
# For training, we want a lot of parallel reading and shuffling.
# For eval, we want no shuffling and parallel reading doesn't matter.
d
=
tf
.
data
.
TFRecordDataset
(
input_file
)
if
num_samples
:
d
=
d
.
take
(
num_samples
)
d
=
d
.
map
(
lambda
record
:
decode_record
(
record
,
name_to_features
),
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
# When `input_file` is a path to a single file or a list
# containing a single path, disable auto sharding so that
# same input file is sent to all workers.
if
isinstance
(
input_file
,
str
)
or
len
(
input_file
)
==
1
:
options
=
tf
.
data
.
Options
()
options
.
experimental_distribute
.
auto_shard_policy
=
(
tf
.
data
.
experimental
.
AutoShardPolicy
.
OFF
)
d
=
d
.
with_options
(
options
)
return
d
def
create_pretrain_dataset
(
input_patterns
,
seq_length
,
max_predictions_per_seq
,
batch_size
,
is_training
=
True
,
input_pipeline_context
=
None
,
use_next_sentence_label
=
True
,
use_position_id
=
False
,
output_fake_labels
=
True
):
"""Creates input dataset from (tf)records files for pretraining."""
name_to_features
=
{
'input_ids'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'input_mask'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'segment_ids'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'masked_lm_positions'
:
tf
.
io
.
FixedLenFeature
([
max_predictions_per_seq
],
tf
.
int64
),
'masked_lm_ids'
:
tf
.
io
.
FixedLenFeature
([
max_predictions_per_seq
],
tf
.
int64
),
'masked_lm_weights'
:
tf
.
io
.
FixedLenFeature
([
max_predictions_per_seq
],
tf
.
float32
),
}
if
use_next_sentence_label
:
name_to_features
[
'next_sentence_labels'
]
=
tf
.
io
.
FixedLenFeature
([
1
],
tf
.
int64
)
if
use_position_id
:
name_to_features
[
'position_ids'
]
=
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
)
for
input_pattern
in
input_patterns
:
if
not
tf
.
io
.
gfile
.
glob
(
input_pattern
):
raise
ValueError
(
'%s does not match any files.'
%
input_pattern
)
dataset
=
tf
.
data
.
Dataset
.
list_files
(
input_patterns
,
shuffle
=
is_training
)
if
input_pipeline_context
and
input_pipeline_context
.
num_input_pipelines
>
1
:
dataset
=
dataset
.
shard
(
input_pipeline_context
.
num_input_pipelines
,
input_pipeline_context
.
input_pipeline_id
)
if
is_training
:
dataset
=
dataset
.
repeat
()
# We set shuffle buffer to exactly match total number of
# training files to ensure that training data is well shuffled.
input_files
=
[]
for
input_pattern
in
input_patterns
:
input_files
.
extend
(
tf
.
io
.
gfile
.
glob
(
input_pattern
))
dataset
=
dataset
.
shuffle
(
len
(
input_files
))
# In parallel, create tf record dataset for each train files.
# cycle_length = 8 means that up to 8 files will be read and deserialized in
# parallel. You may want to increase this number if you have a large number of
# CPU cores.
dataset
=
dataset
.
interleave
(
tf
.
data
.
TFRecordDataset
,
cycle_length
=
8
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
if
is_training
:
dataset
=
dataset
.
shuffle
(
100
)
decode_fn
=
lambda
record
:
decode_record
(
record
,
name_to_features
)
dataset
=
dataset
.
map
(
decode_fn
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
def
_select_data_from_record
(
record
):
"""Filter out features to use for pretraining."""
x
=
{
'input_word_ids'
:
record
[
'input_ids'
],
'input_mask'
:
record
[
'input_mask'
],
'input_type_ids'
:
record
[
'segment_ids'
],
'masked_lm_positions'
:
record
[
'masked_lm_positions'
],
'masked_lm_ids'
:
record
[
'masked_lm_ids'
],
'masked_lm_weights'
:
record
[
'masked_lm_weights'
],
}
if
use_next_sentence_label
:
x
[
'next_sentence_labels'
]
=
record
[
'next_sentence_labels'
]
if
use_position_id
:
x
[
'position_ids'
]
=
record
[
'position_ids'
]
# TODO(hongkuny): Remove the fake labels after migrating bert pretraining.
if
output_fake_labels
:
return
(
x
,
record
[
'masked_lm_weights'
])
else
:
return
x
dataset
=
dataset
.
map
(
_select_data_from_record
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
dataset
=
dataset
.
batch
(
batch_size
,
drop_remainder
=
is_training
)
dataset
=
dataset
.
prefetch
(
tf
.
data
.
experimental
.
AUTOTUNE
)
return
dataset
def
create_classifier_dataset
(
file_path
,
seq_length
,
batch_size
,
is_training
=
True
,
input_pipeline_context
=
None
,
label_type
=
tf
.
int64
,
include_sample_weights
=
False
,
num_samples
=
None
):
"""Creates input dataset from (tf)records files for train/eval."""
name_to_features
=
{
'input_ids'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'input_mask'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'segment_ids'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'label_ids'
:
tf
.
io
.
FixedLenFeature
([],
label_type
),
}
if
include_sample_weights
:
name_to_features
[
'weight'
]
=
tf
.
io
.
FixedLenFeature
([],
tf
.
float32
)
dataset
=
single_file_dataset
(
file_path
,
name_to_features
,
num_samples
=
num_samples
)
# The dataset is always sharded by number of hosts.
# num_input_pipelines is the number of hosts rather than number of cores.
if
input_pipeline_context
and
input_pipeline_context
.
num_input_pipelines
>
1
:
dataset
=
dataset
.
shard
(
input_pipeline_context
.
num_input_pipelines
,
input_pipeline_context
.
input_pipeline_id
)
def
_select_data_from_record
(
record
):
x
=
{
'input_word_ids'
:
record
[
'input_ids'
],
'input_mask'
:
record
[
'input_mask'
],
'input_type_ids'
:
record
[
'segment_ids'
]
}
y
=
record
[
'label_ids'
]
if
include_sample_weights
:
w
=
record
[
'weight'
]
return
(
x
,
y
,
w
)
return
(
x
,
y
)
if
is_training
:
dataset
=
dataset
.
shuffle
(
100
)
dataset
=
dataset
.
repeat
()
dataset
=
dataset
.
map
(
_select_data_from_record
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
dataset
=
dataset
.
batch
(
batch_size
,
drop_remainder
=
is_training
)
dataset
=
dataset
.
prefetch
(
tf
.
data
.
experimental
.
AUTOTUNE
)
return
dataset
def
create_squad_dataset
(
file_path
,
seq_length
,
batch_size
,
is_training
=
True
,
input_pipeline_context
=
None
):
"""Creates input dataset from (tf)records files for train/eval."""
name_to_features
=
{
'input_ids'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'input_mask'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'segment_ids'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
}
if
is_training
:
name_to_features
[
'start_positions'
]
=
tf
.
io
.
FixedLenFeature
([],
tf
.
int64
)
name_to_features
[
'end_positions'
]
=
tf
.
io
.
FixedLenFeature
([],
tf
.
int64
)
else
:
name_to_features
[
'unique_ids'
]
=
tf
.
io
.
FixedLenFeature
([],
tf
.
int64
)
dataset
=
single_file_dataset
(
file_path
,
name_to_features
)
# The dataset is always sharded by number of hosts.
# num_input_pipelines is the number of hosts rather than number of cores.
if
input_pipeline_context
and
input_pipeline_context
.
num_input_pipelines
>
1
:
dataset
=
dataset
.
shard
(
input_pipeline_context
.
num_input_pipelines
,
input_pipeline_context
.
input_pipeline_id
)
def
_select_data_from_record
(
record
):
"""Dispatches record to features and labels."""
x
,
y
=
{},
{}
for
name
,
tensor
in
record
.
items
():
if
name
in
(
'start_positions'
,
'end_positions'
):
y
[
name
]
=
tensor
elif
name
==
'input_ids'
:
x
[
'input_word_ids'
]
=
tensor
elif
name
==
'segment_ids'
:
x
[
'input_type_ids'
]
=
tensor
else
:
x
[
name
]
=
tensor
return
(
x
,
y
)
if
is_training
:
dataset
=
dataset
.
shuffle
(
100
)
dataset
=
dataset
.
repeat
()
dataset
=
dataset
.
map
(
_select_data_from_record
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
dataset
=
dataset
.
batch
(
batch_size
,
drop_remainder
=
True
)
dataset
=
dataset
.
prefetch
(
tf
.
data
.
experimental
.
AUTOTUNE
)
return
dataset
def
create_retrieval_dataset
(
file_path
,
seq_length
,
batch_size
,
input_pipeline_context
=
None
):
"""Creates input dataset from (tf)records files for scoring."""
name_to_features
=
{
'input_ids'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'input_mask'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'segment_ids'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'example_id'
:
tf
.
io
.
FixedLenFeature
([
1
],
tf
.
int64
),
}
dataset
=
single_file_dataset
(
file_path
,
name_to_features
)
# The dataset is always sharded by number of hosts.
# num_input_pipelines is the number of hosts rather than number of cores.
if
input_pipeline_context
and
input_pipeline_context
.
num_input_pipelines
>
1
:
dataset
=
dataset
.
shard
(
input_pipeline_context
.
num_input_pipelines
,
input_pipeline_context
.
input_pipeline_id
)
def
_select_data_from_record
(
record
):
x
=
{
'input_word_ids'
:
record
[
'input_ids'
],
'input_mask'
:
record
[
'input_mask'
],
'input_type_ids'
:
record
[
'segment_ids'
]
}
y
=
record
[
'example_id'
]
return
(
x
,
y
)
dataset
=
dataset
.
map
(
_select_data_from_record
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
dataset
=
dataset
.
batch
(
batch_size
,
drop_remainder
=
False
)
def
_pad_to_batch
(
x
,
y
):
cur_size
=
tf
.
shape
(
y
)[
0
]
pad_size
=
batch_size
-
cur_size
pad_ids
=
tf
.
zeros
(
shape
=
[
pad_size
,
seq_length
],
dtype
=
tf
.
int32
)
for
key
in
(
'input_word_ids'
,
'input_mask'
,
'input_type_ids'
):
x
[
key
]
=
tf
.
concat
([
x
[
key
],
pad_ids
],
axis
=
0
)
pad_labels
=
-
tf
.
ones
(
shape
=
[
pad_size
,
1
],
dtype
=
tf
.
int32
)
y
=
tf
.
concat
([
y
,
pad_labels
],
axis
=
0
)
return
x
,
y
dataset
=
dataset
.
map
(
_pad_to_batch
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
dataset
=
dataset
.
prefetch
(
tf
.
data
.
experimental
.
AUTOTUNE
)
return
dataset
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/model_saving_utils.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities to save models."""
import
os
from
absl
import
logging
import
tensorflow
as
tf
import
typing
def
export_bert_model
(
model_export_path
:
typing
.
Text
,
model
:
tf
.
keras
.
Model
,
checkpoint_dir
:
typing
.
Optional
[
typing
.
Text
]
=
None
,
restore_model_using_load_weights
:
bool
=
False
)
->
None
:
"""Export BERT model for serving which does not include the optimizer.
Args:
model_export_path: Path to which exported model will be saved.
model: Keras model object to export.
checkpoint_dir: Path from which model weights will be loaded, if
specified.
restore_model_using_load_weights: Whether to use checkpoint.restore() API
for custom checkpoint or to use model.load_weights() API. There are 2
different ways to save checkpoints. One is using tf.train.Checkpoint and
another is using Keras model.save_weights(). Custom training loop
implementation uses tf.train.Checkpoint API and Keras ModelCheckpoint
callback internally uses model.save_weights() API. Since these two API's
cannot be used toghether, model loading logic must be take into account
how model checkpoint was saved.
Raises:
ValueError when either model_export_path or model is not specified.
"""
if
not
model_export_path
:
raise
ValueError
(
'model_export_path must be specified.'
)
if
not
isinstance
(
model
,
tf
.
keras
.
Model
):
raise
ValueError
(
'model must be a tf.keras.Model object.'
)
if
checkpoint_dir
:
if
restore_model_using_load_weights
:
model_weight_path
=
os
.
path
.
join
(
checkpoint_dir
,
'checkpoint'
)
assert
tf
.
io
.
gfile
.
exists
(
model_weight_path
)
model
.
load_weights
(
model_weight_path
)
else
:
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
model
)
# Restores the model from latest checkpoint.
latest_checkpoint_file
=
tf
.
train
.
latest_checkpoint
(
checkpoint_dir
)
assert
latest_checkpoint_file
logging
.
info
(
'Checkpoint file %s found and restoring from '
'checkpoint'
,
latest_checkpoint_file
)
checkpoint
.
restore
(
latest_checkpoint_file
).
assert_existing_objects_matched
()
model
.
save
(
model_export_path
,
include_optimizer
=
False
,
save_format
=
'tf'
)
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/model_training_utils.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A light weight utilities to train NLP models."""
import
json
import
os
import
tempfile
from
absl
import
logging
import
tensorflow
as
tf
from
tensorflow.python.util
import
deprecation
from
official.common
import
distribute_utils
from
official.modeling
import
grad_utils
_SUMMARY_TXT
=
'training_summary.txt'
_MIN_SUMMARY_STEPS
=
10
def
_should_export_checkpoint
(
strategy
):
return
(
not
strategy
)
or
strategy
.
extended
.
should_checkpoint
def
_should_export_summary
(
strategy
):
return
(
not
strategy
)
or
strategy
.
extended
.
should_save_summary
def
_save_checkpoint
(
strategy
,
checkpoint
,
model_dir
,
checkpoint_prefix
):
"""Saves model to with provided checkpoint prefix."""
if
_should_export_checkpoint
(
strategy
):
checkpoint_path
=
os
.
path
.
join
(
model_dir
,
checkpoint_prefix
)
saved_path
=
checkpoint
.
save
(
checkpoint_path
)
logging
.
info
(
'Saving model as TF checkpoint: %s'
,
saved_path
)
else
:
# In multi worker training we need every worker to save checkpoint, because
# variables can trigger synchronization on read and synchronization needs
# all workers to participate. To avoid workers overriding each other we save
# to a temporary directory on non-chief workers.
tmp_dir
=
tempfile
.
mkdtemp
()
checkpoint
.
save
(
os
.
path
.
join
(
tmp_dir
,
'ckpt'
))
tf
.
io
.
gfile
.
rmtree
(
tmp_dir
)
return
def
_get_input_iterator
(
input_fn
,
strategy
):
"""Returns distributed dataset iterator."""
# When training with TPU pods, datasets needs to be cloned across
# workers. Since Dataset instance cannot be cloned in eager mode, we instead
# pass callable that returns a dataset.
if
not
callable
(
input_fn
):
raise
ValueError
(
'`input_fn` should be a closure that returns a dataset.'
)
iterator
=
iter
(
strategy
.
distribute_datasets_from_function
(
input_fn
))
return
iterator
def
_float_metric_value
(
metric
):
"""Gets the value of a float-value keras metric."""
return
metric
.
result
().
numpy
().
astype
(
float
)
def
clip_by_global_norm_callback
(
grads_and_vars
):
"""Performs gradient clipping."""
grads
,
variables
=
zip
(
*
grads_and_vars
)
(
clipped_grads
,
_
)
=
tf
.
clip_by_global_norm
(
grads
,
clip_norm
=
1.0
)
return
zip
(
clipped_grads
,
variables
)
def
steps_to_run
(
current_step
,
steps_per_epoch
,
steps_per_loop
):
"""Calculates steps to run on device."""
if
steps_per_loop
<=
0
:
raise
ValueError
(
'steps_per_loop should be positive integer.'
)
if
steps_per_loop
==
1
:
return
steps_per_loop
remainder_in_epoch
=
current_step
%
steps_per_epoch
if
remainder_in_epoch
!=
0
:
return
min
(
steps_per_epoch
-
remainder_in_epoch
,
steps_per_loop
)
else
:
return
steps_per_loop
def
write_txt_summary
(
training_summary
,
summary_dir
):
"""Writes a summary text file to record stats."""
if
not
tf
.
io
.
gfile
.
exists
(
summary_dir
):
tf
.
io
.
gfile
.
mkdir
(
summary_dir
)
summary_path
=
os
.
path
.
join
(
summary_dir
,
_SUMMARY_TXT
)
with
tf
.
io
.
gfile
.
GFile
(
summary_path
,
'wb'
)
as
f
:
logging
.
info
(
'Training Summary:
\n
%s'
,
str
(
training_summary
))
f
.
write
(
json
.
dumps
(
training_summary
,
indent
=
4
))
@
deprecation
.
deprecated
(
None
,
'This function is deprecated and we do not expect adding new '
'functionalities. Please do not have your code depending '
'on this library.'
)
def
run_customized_training_loop
(
# pylint: disable=invalid-name
_sentinel
=
None
,
# pylint: enable=invalid-name
strategy
=
None
,
model_fn
=
None
,
loss_fn
=
None
,
scale_loss
=
True
,
model_dir
=
None
,
train_input_fn
=
None
,
steps_per_epoch
=
None
,
num_eval_per_epoch
=
1
,
steps_per_loop
=
None
,
epochs
=
1
,
eval_input_fn
=
None
,
eval_steps
=
None
,
metric_fn
=
None
,
init_checkpoint
=
None
,
custom_callbacks
=
None
,
run_eagerly
=
False
,
sub_model_export_name
=
None
,
explicit_allreduce
=
False
,
pre_allreduce_callbacks
=
None
,
post_allreduce_callbacks
=
None
,
train_summary_interval
=
0
,
allreduce_bytes_per_pack
=
0
):
"""Run BERT pretrain model training using low-level API.
Args:
_sentinel: Used to prevent positional parameters. Internal, do not use.
strategy: Distribution strategy on which to run low level training loop.
model_fn: Function that returns a tuple (model, sub_model). Caller of this
function should add optimizer to the `model` via calling
`model.compile()` API or manually setting `model.optimizer` attribute.
Second element of the returned tuple(sub_model) is an optional sub model
to be used for initial checkpoint -- if provided.
loss_fn: Function with signature func(labels, logits) and returns a loss
tensor.
scale_loss: Whether to divide the raw loss by number of replicas before
gradients calculation.
model_dir: Model directory used during training for restoring/saving model
weights.
train_input_fn: Function that returns a tf.data.Dataset used for training.
steps_per_epoch: Number of steps to run per epoch. At the end of each
epoch, model checkpoint will be saved and evaluation will be conducted
if evaluation dataset is provided.
num_eval_per_epoch: Number of evaluations per epoch.
steps_per_loop: Number of steps per graph-mode loop. In order to reduce
communication in eager context, training logs are printed every
steps_per_loop.
epochs: Number of epochs to train.
eval_input_fn: Function that returns evaluation dataset. If none,
evaluation is skipped.
eval_steps: Number of steps to run evaluation. Required if `eval_input_fn`
is not none.
metric_fn: A metrics function that returns either a Keras Metric object or
a list of Keras Metric objects to record evaluation result using
evaluation dataset or with training dataset after every epoch.
init_checkpoint: Optional checkpoint to load to `sub_model` returned by
`model_fn`.
custom_callbacks: A list of Keras Callbacks objects to run during
training. More specifically, `on_train_begin(), on_train_end(),
on_batch_begin()`, `on_batch_end()`, `on_epoch_begin()`,
`on_epoch_end()` methods are invoked during training. Note that some
metrics may be missing from `logs`.
run_eagerly: Whether to run model training in pure eager execution. This
should be disable for TPUStrategy.
sub_model_export_name: If not None, will export `sub_model` returned by
`model_fn` into checkpoint files. The name of intermediate checkpoint
file is {sub_model_export_name}_step_{step}.ckpt and the last
checkpint's name is {sub_model_export_name}.ckpt; if None, `sub_model`
will not be exported as checkpoint.
explicit_allreduce: Whether to explicitly perform gradient allreduce,
instead of relying on implicit allreduce in optimizer.apply_gradients().
default is False. For now, if training using FP16 mixed precision,
explicit allreduce will aggregate gradients in FP16 format. For TPU and
GPU training using FP32, explicit allreduce will aggregate gradients in
FP32 format.
pre_allreduce_callbacks: A list of callback functions that takes gradients
and model variables pairs as input, manipulate them, and returns a new
gradients and model variables paris. The callback functions will be
invoked in the list order and before gradients are allreduced. With
mixed precision training, the pre_allreduce_allbacks will be applied on
scaled_gradients. Default is no callbacks. Only used when
explicit_allreduce=True.
post_allreduce_callbacks: A list of callback functions that takes
gradients and model variables pairs as input, manipulate them, and
returns a new gradients and model variables paris. The callback
functions will be invoked in the list order and right before gradients
are applied to variables for updates. Default is no callbacks. Only used
when explicit_allreduce=True.
train_summary_interval: Step interval for training summaries. If the value
is a negative number, then training summaries are not enabled.
allreduce_bytes_per_pack: A non-negative integer. Breaks collective
operations into packs of certain size. If it's zero, all gradients are
in one pack. Breaking gradient into packs could enable overlap between
allreduce and backprop computation. This flag only takes effect when
explicit_allreduce is set to True.'
Returns:
Trained model.
Raises:
ValueError: (1) When model returned by `model_fn` does not have optimizer
attribute or when required parameters are set to none. (2) eval args are
not specified correctly. (3) metric_fn must be a callable if specified.
(4) sub_model_checkpoint_name is specified, but `sub_model` returned
by `model_fn` is None.
"""
if
_sentinel
is
not
None
:
raise
ValueError
(
'only call `run_customized_training_loop()` '
'with named arguments.'
)
required_arguments
=
[
strategy
,
model_fn
,
loss_fn
,
model_dir
,
steps_per_epoch
,
train_input_fn
]
steps_between_evals
=
int
(
steps_per_epoch
/
num_eval_per_epoch
)
if
[
arg
for
arg
in
required_arguments
if
arg
is
None
]:
raise
ValueError
(
'`strategy`, `model_fn`, `loss_fn`, `model_dir`, '
'`steps_per_epoch` and `train_input_fn` are required '
'parameters.'
)
if
not
steps_per_loop
:
if
tf
.
config
.
list_logical_devices
(
'TPU'
):
# One can't fully utilize a TPU with steps_per_loop=1, so in this case
# default users to a more useful value.
steps_per_loop
=
min
(
1000
,
steps_between_evals
)
else
:
steps_per_loop
=
1
logging
.
info
(
'steps_per_loop not specified. Using steps_per_loop=%d'
,
steps_per_loop
)
if
steps_per_loop
>
steps_between_evals
:
logging
.
warning
(
'steps_per_loop: %d is specified to be greater than '
' steps_between_evals: %d, we will use steps_between_evals as'
' steps_per_loop.'
,
steps_per_loop
,
steps_between_evals
)
steps_per_loop
=
steps_between_evals
assert
tf
.
executing_eagerly
()
if
run_eagerly
:
if
isinstance
(
strategy
,
(
tf
.
distribute
.
TPUStrategy
,
tf
.
distribute
.
experimental
.
TPUStrategy
)):
raise
ValueError
(
'TPUStrategy should not run eagerly as it heavily relies on graph'
' optimization for the distributed system.'
)
if
eval_input_fn
and
eval_steps
is
None
:
raise
ValueError
(
'`eval_step` is required when `eval_input_fn ` is not none.'
)
if
metric_fn
and
not
callable
(
metric_fn
):
raise
ValueError
(
'if `metric_fn` is specified, metric_fn must be a callable.'
)
total_training_steps
=
steps_per_epoch
*
epochs
train_iterator
=
_get_input_iterator
(
train_input_fn
,
strategy
)
eval_loss_metric
=
tf
.
keras
.
metrics
.
Mean
(
'training_loss'
,
dtype
=
tf
.
float32
)
with
distribute_utils
.
get_strategy_scope
(
strategy
):
# To correctly place the model weights on accelerators,
# model and optimizer should be created in scope.
model
,
sub_model
=
model_fn
()
if
not
hasattr
(
model
,
'optimizer'
):
raise
ValueError
(
'User should set optimizer attribute to model '
'inside `model_fn`.'
)
if
sub_model_export_name
and
sub_model
is
None
:
raise
ValueError
(
'sub_model_export_name is specified as %s, but '
'sub_model is None.'
%
sub_model_export_name
)
callback_list
=
tf
.
keras
.
callbacks
.
CallbackList
(
callbacks
=
custom_callbacks
,
model
=
model
)
optimizer
=
model
.
optimizer
if
init_checkpoint
:
logging
.
info
(
'Checkpoint file %s found and restoring from '
'initial checkpoint for core model.'
,
init_checkpoint
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
sub_model
,
encoder
=
sub_model
)
checkpoint
.
read
(
init_checkpoint
).
assert_existing_objects_matched
()
logging
.
info
(
'Loading from checkpoint file completed'
)
train_loss_metric
=
tf
.
keras
.
metrics
.
Mean
(
'training_loss'
,
dtype
=
tf
.
float32
)
eval_metrics
=
metric_fn
()
if
metric_fn
else
[]
if
not
isinstance
(
eval_metrics
,
list
):
eval_metrics
=
[
eval_metrics
]
# If evaluation is required, make a copy of metric as it will be used by
# both train and evaluation.
train_metrics
=
[
metric
.
__class__
.
from_config
(
metric
.
get_config
())
for
metric
in
eval_metrics
]
# Create summary writers
if
_should_export_summary
(
strategy
):
summary_dir
=
os
.
path
.
join
(
model_dir
,
'summaries'
)
else
:
# In multi worker training we need every worker to write summary, because
# variables can trigger synchronization on read and synchronization needs
# all workers to participate.
summary_dir
=
tempfile
.
mkdtemp
()
eval_summary_writer
=
tf
.
summary
.
create_file_writer
(
os
.
path
.
join
(
summary_dir
,
'eval'
))
last_summary_step
=
0
if
steps_per_loop
>=
_MIN_SUMMARY_STEPS
and
train_summary_interval
>=
0
:
# Only writes summary when the stats are collected sufficiently over
# enough steps.
train_summary_writer
=
tf
.
summary
.
create_file_writer
(
os
.
path
.
join
(
summary_dir
,
'train'
))
else
:
train_summary_writer
=
tf
.
summary
.
create_noop_writer
()
# Collects training variables.
training_vars
=
model
.
trainable_variables
def
_replicated_step
(
inputs
):
"""Replicated training step."""
inputs
,
labels
=
inputs
with
tf
.
GradientTape
()
as
tape
:
model_outputs
=
model
(
inputs
,
training
=
True
)
loss
=
loss_fn
(
labels
,
model_outputs
)
# Raw loss is used for reporting in metrics/logs.
raw_loss
=
loss
if
scale_loss
:
# Scales down the loss for gradients to be invariant from replicas.
loss
=
loss
/
strategy
.
num_replicas_in_sync
if
explicit_allreduce
:
grad_utils
.
minimize_using_explicit_allreduce
(
tape
,
optimizer
,
loss
,
training_vars
,
pre_allreduce_callbacks
,
post_allreduce_callbacks
,
allreduce_bytes_per_pack
)
else
:
if
isinstance
(
optimizer
,
tf
.
keras
.
mixed_precision
.
LossScaleOptimizer
):
with
tape
:
scaled_loss
=
optimizer
.
get_scaled_loss
(
loss
)
scaled_grads
=
tape
.
gradient
(
scaled_loss
,
training_vars
)
grads
=
optimizer
.
get_unscaled_gradients
(
scaled_grads
)
else
:
grads
=
tape
.
gradient
(
loss
,
training_vars
)
optimizer
.
apply_gradients
(
zip
(
grads
,
training_vars
))
# For reporting, the metric takes the mean of losses.
train_loss_metric
.
update_state
(
raw_loss
)
for
metric
in
train_metrics
:
metric
.
update_state
(
labels
,
model_outputs
)
@
tf
.
function
def
train_steps
(
iterator
,
steps
):
"""Performs distributed training steps in a loop.
Args:
iterator: the distributed iterator of training datasets.
steps: an tf.int32 integer tensor to specify number of steps to run
inside host training loop.
Raises:
ValueError: Any of the arguments or tensor shapes are invalid.
"""
if
not
isinstance
(
steps
,
tf
.
Tensor
):
raise
ValueError
(
'steps should be an Tensor. Python object may cause '
'retracing.'
)
for
_
in
tf
.
range
(
steps
):
strategy
.
run
(
_replicated_step
,
args
=
(
next
(
iterator
),))
def
train_single_step
(
iterator
):
"""Performs a distributed training step.
Args:
iterator: the distributed iterator of training datasets.
Raises:
ValueError: Any of the arguments or tensor shapes are invalid.
"""
strategy
.
run
(
_replicated_step
,
args
=
(
next
(
iterator
),))
def
test_step
(
iterator
):
"""Calculates evaluation metrics on distributed devices."""
def
_test_step_fn
(
inputs
):
"""Replicated accuracy calculation."""
inputs
,
labels
=
inputs
model_outputs
=
model
(
inputs
,
training
=
False
)
for
metric
in
eval_metrics
:
metric
.
update_state
(
labels
,
model_outputs
)
return
model_outputs
,
labels
outputs
,
labels
=
strategy
.
run
(
_test_step_fn
,
args
=
(
next
(
iterator
),))
outputs
=
tf
.
nest
.
map_structure
(
strategy
.
experimental_local_results
,
outputs
)
labels
=
tf
.
nest
.
map_structure
(
strategy
.
experimental_local_results
,
labels
)
return
outputs
,
labels
if
not
run_eagerly
:
train_single_step
=
tf
.
function
(
train_single_step
)
test_step
=
tf
.
function
(
test_step
)
def
_run_evaluation
(
current_training_step
,
test_iterator
):
"""Runs validation steps and aggregate metrics.
Args:
current_training_step: tf.int32 tensor containing the current step.
test_iterator: distributed iterator of test datasets.
Returns:
A dict of metic names and values.
"""
# The last batch of the evaluation is often smaller than previous ones.
# Moreover, in some distributed pieces it might even be empty. Therefore,
# different from the way training_loss is calculated, it is needed to
# gather all the logits and labels here to calculate the evaluation loss
# outside.
loss_list
,
loss_weights
=
list
(),
list
()
for
_
in
range
(
eval_steps
):
outputs
,
labels
=
test_step
(
test_iterator
)
for
cur_logits
,
cur_labels
in
zip
(
outputs
,
labels
):
# This is to handle cases when cur_labels is not a single tensor,
# but a dict of tensors.
cur_weight
=
tf
.
shape
(
tf
.
nest
.
flatten
(
cur_labels
)[
0
])[
0
]
if
cur_weight
!=
0
:
loss_list
.
append
(
loss_fn
(
cur_labels
,
cur_logits
).
numpy
())
loss_weights
.
append
(
cur_weight
)
# The sample_weights are the actual number of examples in each batch,
# a summation of numbers of examples in each replica if using
# distributed training.
eval_loss_metric
.
update_state
(
loss_list
,
sample_weight
=
loss_weights
)
logs
=
{}
with
eval_summary_writer
.
as_default
():
for
metric
in
[
eval_loss_metric
]
+
eval_metrics
+
model
.
metrics
:
metric_value
=
_float_metric_value
(
metric
)
logs
[
metric
.
name
]
=
metric_value
logging
.
info
(
'Step: [%d] Validation %s = %f'
,
current_training_step
,
metric
.
name
,
metric_value
)
tf
.
summary
.
scalar
(
metric
.
name
,
metric_value
,
step
=
current_training_step
)
eval_summary_writer
.
flush
()
return
logs
# Training loop starts here.
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
model
,
optimizer
=
optimizer
,
global_step
=
optimizer
.
iterations
)
sub_model_checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
sub_model
,
global_step
=
optimizer
.
iterations
)
if
sub_model_export_name
else
None
latest_checkpoint_file
=
tf
.
train
.
latest_checkpoint
(
model_dir
)
if
latest_checkpoint_file
:
logging
.
info
(
'Checkpoint file %s found and restoring from '
'checkpoint'
,
latest_checkpoint_file
)
checkpoint
.
restore
(
latest_checkpoint_file
)
logging
.
info
(
'Loading from checkpoint file completed'
)
current_step
=
optimizer
.
iterations
.
numpy
()
checkpoint_name
=
'ctl_step_{step}.ckpt'
logs
=
{}
callback_list
.
on_train_begin
()
while
current_step
<
total_training_steps
and
not
model
.
stop_training
:
if
current_step
%
steps_per_epoch
==
0
:
callback_list
.
on_epoch_begin
(
int
(
current_step
/
steps_per_epoch
)
+
1
)
# Training loss/metric are taking average over steps inside micro
# training loop. We reset the their values before each round.
train_loss_metric
.
reset_states
()
for
metric
in
train_metrics
+
model
.
metrics
:
metric
.
reset_states
()
callback_list
.
on_batch_begin
(
current_step
)
# Runs several steps in the host while loop.
steps
=
steps_to_run
(
current_step
,
steps_between_evals
,
steps_per_loop
)
if
tf
.
config
.
list_physical_devices
(
'GPU'
):
# TODO(zongweiz): merge with train_steps once tf.while_loop
# GPU performance bugs are fixed.
for
_
in
range
(
steps
):
train_single_step
(
train_iterator
)
else
:
# Converts steps to a Tensor to avoid tf.function retracing.
train_steps
(
train_iterator
,
tf
.
convert_to_tensor
(
steps
,
dtype
=
tf
.
int32
))
train_loss
=
_float_metric_value
(
train_loss_metric
)
current_step
+=
steps
# Updates training logging.
training_status
=
'Train Step: %d/%d / loss = %s'
%
(
current_step
,
total_training_steps
,
train_loss
)
if
current_step
>=
last_summary_step
+
train_summary_interval
:
summary_writer
=
train_summary_writer
last_summary_step
=
current_step
else
:
summary_writer
=
tf
.
summary
.
create_noop_writer
()
with
summary_writer
.
as_default
():
if
callable
(
optimizer
.
learning_rate
):
tf
.
summary
.
scalar
(
'learning_rate'
,
optimizer
.
learning_rate
(
current_step
),
step
=
current_step
)
tf
.
summary
.
scalar
(
train_loss_metric
.
name
,
train_loss
,
step
=
current_step
)
for
metric
in
train_metrics
+
model
.
metrics
:
metric_value
=
_float_metric_value
(
metric
)
training_status
+=
' %s = %f'
%
(
metric
.
name
,
metric_value
)
tf
.
summary
.
scalar
(
metric
.
name
,
metric_value
,
step
=
current_step
)
summary_writer
.
flush
()
logging
.
info
(
training_status
)
# If no need for evaluation, we only call on_batch_end with train_loss,
# this is to ensure we get granular global_step/sec on Tensorboard.
if
current_step
%
steps_between_evals
:
callback_list
.
on_batch_end
(
current_step
-
1
,
{
'loss'
:
train_loss
})
else
:
# Save a submodel with the step in the file name after each epoch.
if
sub_model_export_name
:
_save_checkpoint
(
strategy
,
sub_model_checkpoint
,
model_dir
,
'%s_step_%d.ckpt'
%
(
sub_model_export_name
,
current_step
))
# Save model checkpoints and run validation steps after each epoch
# (with the exception of the final epoch which is handled after the
# training loop).
if
current_step
<
total_training_steps
:
_save_checkpoint
(
strategy
,
checkpoint
,
model_dir
,
checkpoint_name
.
format
(
step
=
current_step
))
if
eval_input_fn
:
# Re-initialize evaluation metric.
eval_loss_metric
.
reset_states
()
for
metric
in
eval_metrics
+
model
.
metrics
:
metric
.
reset_states
()
logging
.
info
(
'Running evaluation after step: %s.'
,
current_step
)
logs
=
_run_evaluation
(
current_step
,
_get_input_iterator
(
eval_input_fn
,
strategy
))
# We add train_loss here rather than call on_batch_end twice to make
# sure that no duplicated values are generated.
logs
[
'loss'
]
=
train_loss
callback_list
.
on_batch_end
(
current_step
-
1
,
logs
)
# Calls on_epoch_end after each real epoch ends to prevent mis-calculation
# of training steps.
if
current_step
%
steps_per_epoch
==
0
:
callback_list
.
on_epoch_end
(
int
(
current_step
/
steps_per_epoch
),
logs
)
if
sub_model_export_name
:
_save_checkpoint
(
strategy
,
sub_model_checkpoint
,
model_dir
,
'%s.ckpt'
%
sub_model_export_name
)
_save_checkpoint
(
strategy
,
checkpoint
,
model_dir
,
checkpoint_name
.
format
(
step
=
current_step
))
if
eval_input_fn
:
# Re-initialize evaluation metric.
eval_loss_metric
.
reset_states
()
for
metric
in
eval_metrics
+
model
.
metrics
:
metric
.
reset_states
()
logging
.
info
(
'Running final evaluation after training is complete.'
)
logs
=
_run_evaluation
(
current_step
,
_get_input_iterator
(
eval_input_fn
,
strategy
))
callback_list
.
on_epoch_end
(
int
(
current_step
/
steps_per_epoch
),
logs
)
training_summary
=
{
'total_training_steps'
:
total_training_steps
,
'train_loss'
:
_float_metric_value
(
train_loss_metric
),
}
for
metric
in
model
.
metrics
:
training_summary
[
metric
.
name
]
=
_float_metric_value
(
metric
)
if
eval_metrics
:
training_summary
[
'last_train_metrics'
]
=
_float_metric_value
(
train_metrics
[
0
])
training_summary
[
'eval_metrics'
]
=
_float_metric_value
(
eval_metrics
[
0
])
write_txt_summary
(
training_summary
,
summary_dir
)
if
not
_should_export_summary
(
strategy
):
tf
.
io
.
gfile
.
rmtree
(
summary_dir
)
callback_list
.
on_train_end
()
return
model
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/model_training_utils_test.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for official.modeling.training.model_training_utils."""
import
os
from
absl
import
logging
from
absl.testing
import
flagsaver
from
absl.testing
import
parameterized
from
absl.testing.absltest
import
mock
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.distribute
import
combinations
from
tensorflow.python.distribute
import
strategy_combinations
from
official.nlp.bert
import
common_flags
from
official.nlp.bert
import
model_training_utils
common_flags
.
define_common_bert_flags
()
def
eager_strategy_combinations
():
return
combinations
.
combine
(
distribution
=
[
strategy_combinations
.
default_strategy
,
strategy_combinations
.
cloud_tpu_strategy
,
strategy_combinations
.
one_device_strategy_gpu
,
strategy_combinations
.
mirrored_strategy_with_gpu_and_cpu
,
strategy_combinations
.
mirrored_strategy_with_two_gpus
,
],)
def
eager_gpu_strategy_combinations
():
return
combinations
.
combine
(
distribution
=
[
strategy_combinations
.
default_strategy
,
strategy_combinations
.
one_device_strategy_gpu
,
strategy_combinations
.
mirrored_strategy_with_gpu_and_cpu
,
strategy_combinations
.
mirrored_strategy_with_two_gpus
,
],)
def
create_fake_data_input_fn
(
batch_size
,
features_shape
,
num_classes
):
"""Creates a dummy input function with the given feature and label shapes.
Args:
batch_size: integer.
features_shape: list[int]. Feature shape for an individual example.
num_classes: integer. Number of labels.
Returns:
An input function that is usable in the executor.
"""
def
_dataset_fn
(
input_context
=
None
):
"""An input function for generating fake data."""
local_batch_size
=
input_context
.
get_per_replica_batch_size
(
batch_size
)
features
=
np
.
random
.
rand
(
64
,
*
features_shape
)
labels
=
np
.
random
.
randint
(
2
,
size
=
[
64
,
num_classes
])
# Convert the inputs to a Dataset.
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
features
,
labels
))
dataset
=
dataset
.
shard
(
input_context
.
num_input_pipelines
,
input_context
.
input_pipeline_id
)
def
_assign_dtype
(
features
,
labels
):
features
=
tf
.
cast
(
features
,
tf
.
float32
)
labels
=
tf
.
cast
(
labels
,
tf
.
float32
)
return
features
,
labels
# Shuffle, repeat, and batch the examples.
dataset
=
dataset
.
map
(
_assign_dtype
)
dataset
=
dataset
.
shuffle
(
64
).
repeat
()
dataset
=
dataset
.
batch
(
local_batch_size
,
drop_remainder
=
True
)
dataset
=
dataset
.
prefetch
(
buffer_size
=
64
)
return
dataset
return
_dataset_fn
def
create_model_fn
(
input_shape
,
num_classes
,
use_float16
=
False
):
def
_model_fn
():
"""A one-layer softmax model suitable for testing."""
input_layer
=
tf
.
keras
.
layers
.
Input
(
shape
=
input_shape
)
x
=
tf
.
keras
.
layers
.
Dense
(
num_classes
,
activation
=
'relu'
)(
input_layer
)
output_layer
=
tf
.
keras
.
layers
.
Dense
(
num_classes
,
activation
=
'softmax'
)(
x
)
sub_model
=
tf
.
keras
.
models
.
Model
(
input_layer
,
x
,
name
=
'sub_model'
)
model
=
tf
.
keras
.
models
.
Model
(
input_layer
,
output_layer
,
name
=
'model'
)
model
.
add_metric
(
tf
.
reduce_mean
(
input_layer
),
name
=
'mean_input'
,
aggregation
=
'mean'
)
model
.
optimizer
=
tf
.
keras
.
optimizers
.
SGD
(
learning_rate
=
0.1
,
momentum
=
0.9
)
if
use_float16
:
model
.
optimizer
=
tf
.
keras
.
mixed_precision
.
LossScaleOptimizer
(
model
.
optimizer
)
return
model
,
sub_model
return
_model_fn
def
metric_fn
():
"""Gets a tf.keras metric object."""
return
tf
.
keras
.
metrics
.
CategoricalAccuracy
(
name
=
'accuracy'
,
dtype
=
tf
.
float32
)
def
summaries_with_matching_keyword
(
keyword
,
summary_dir
):
"""Yields summary protos matching given keyword from event file."""
event_paths
=
tf
.
io
.
gfile
.
glob
(
os
.
path
.
join
(
summary_dir
,
'events*'
))
for
event
in
tf
.
compat
.
v1
.
train
.
summary_iterator
(
event_paths
[
-
1
]):
if
event
.
summary
is
not
None
:
for
value
in
event
.
summary
.
value
:
if
keyword
in
value
.
tag
:
logging
.
error
(
event
)
yield
event
.
summary
def
check_eventfile_for_keyword
(
keyword
,
summary_dir
):
"""Checks event files for the keyword."""
return
any
(
summaries_with_matching_keyword
(
keyword
,
summary_dir
))
class
RecordingCallback
(
tf
.
keras
.
callbacks
.
Callback
):
def
__init__
(
self
):
self
.
batch_begin
=
[]
# (batch, logs)
self
.
batch_end
=
[]
# (batch, logs)
self
.
epoch_begin
=
[]
# (epoch, logs)
self
.
epoch_end
=
[]
# (epoch, logs)
def
on_batch_begin
(
self
,
batch
,
logs
=
None
):
self
.
batch_begin
.
append
((
batch
,
logs
))
def
on_batch_end
(
self
,
batch
,
logs
=
None
):
self
.
batch_end
.
append
((
batch
,
logs
))
def
on_epoch_begin
(
self
,
epoch
,
logs
=
None
):
self
.
epoch_begin
.
append
((
epoch
,
logs
))
def
on_epoch_end
(
self
,
epoch
,
logs
=
None
):
self
.
epoch_end
.
append
((
epoch
,
logs
))
class
ModelTrainingUtilsTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
def
setUp
(
self
):
super
(
ModelTrainingUtilsTest
,
self
).
setUp
()
self
.
_model_fn
=
create_model_fn
(
input_shape
=
[
128
],
num_classes
=
3
)
@
flagsaver
.
flagsaver
def
run_training
(
self
,
strategy
,
model_dir
,
steps_per_loop
,
run_eagerly
):
input_fn
=
create_fake_data_input_fn
(
batch_size
=
8
,
features_shape
=
[
128
],
num_classes
=
3
)
model_training_utils
.
run_customized_training_loop
(
strategy
=
strategy
,
model_fn
=
self
.
_model_fn
,
loss_fn
=
tf
.
keras
.
losses
.
categorical_crossentropy
,
model_dir
=
model_dir
,
steps_per_epoch
=
20
,
steps_per_loop
=
steps_per_loop
,
epochs
=
2
,
train_input_fn
=
input_fn
,
eval_input_fn
=
input_fn
,
eval_steps
=
10
,
init_checkpoint
=
None
,
sub_model_export_name
=
'my_submodel_name'
,
metric_fn
=
metric_fn
,
custom_callbacks
=
None
,
run_eagerly
=
run_eagerly
)
@
combinations
.
generate
(
eager_strategy_combinations
())
def
test_train_eager_single_step
(
self
,
distribution
):
model_dir
=
self
.
create_tempdir
().
full_path
if
isinstance
(
distribution
,
(
tf
.
distribute
.
TPUStrategy
,
tf
.
distribute
.
experimental
.
TPUStrategy
)):
with
self
.
assertRaises
(
ValueError
):
self
.
run_training
(
distribution
,
model_dir
,
steps_per_loop
=
1
,
run_eagerly
=
True
)
else
:
self
.
run_training
(
distribution
,
model_dir
,
steps_per_loop
=
1
,
run_eagerly
=
True
)
@
combinations
.
generate
(
eager_gpu_strategy_combinations
())
def
test_train_eager_mixed_precision
(
self
,
distribution
):
model_dir
=
self
.
create_tempdir
().
full_path
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'mixed_float16'
)
self
.
_model_fn
=
create_model_fn
(
input_shape
=
[
128
],
num_classes
=
3
,
use_float16
=
True
)
self
.
run_training
(
distribution
,
model_dir
,
steps_per_loop
=
1
,
run_eagerly
=
True
)
@
combinations
.
generate
(
eager_strategy_combinations
())
def
test_train_check_artifacts
(
self
,
distribution
):
model_dir
=
self
.
create_tempdir
().
full_path
self
.
run_training
(
distribution
,
model_dir
,
steps_per_loop
=
10
,
run_eagerly
=
False
)
# Two checkpoints should be saved after two epochs.
files
=
map
(
os
.
path
.
basename
,
tf
.
io
.
gfile
.
glob
(
os
.
path
.
join
(
model_dir
,
'ctl_step_*index'
)))
self
.
assertCountEqual
(
[
'ctl_step_20.ckpt-1.index'
,
'ctl_step_40.ckpt-2.index'
],
files
)
# Three submodel checkpoints should be saved after two epochs (one after
# each epoch plus one final).
files
=
map
(
os
.
path
.
basename
,
tf
.
io
.
gfile
.
glob
(
os
.
path
.
join
(
model_dir
,
'my_submodel_name*index'
)))
self
.
assertCountEqual
([
'my_submodel_name.ckpt-3.index'
,
'my_submodel_name_step_20.ckpt-1.index'
,
'my_submodel_name_step_40.ckpt-2.index'
],
files
)
self
.
assertNotEmpty
(
tf
.
io
.
gfile
.
glob
(
os
.
path
.
join
(
model_dir
,
'summaries/training_summary*'
)))
# Loss and accuracy values should be written into summaries.
self
.
assertTrue
(
check_eventfile_for_keyword
(
'loss'
,
os
.
path
.
join
(
model_dir
,
'summaries/train'
)))
self
.
assertTrue
(
check_eventfile_for_keyword
(
'accuracy'
,
os
.
path
.
join
(
model_dir
,
'summaries/train'
)))
self
.
assertTrue
(
check_eventfile_for_keyword
(
'mean_input'
,
os
.
path
.
join
(
model_dir
,
'summaries/train'
)))
self
.
assertTrue
(
check_eventfile_for_keyword
(
'accuracy'
,
os
.
path
.
join
(
model_dir
,
'summaries/eval'
)))
self
.
assertTrue
(
check_eventfile_for_keyword
(
'mean_input'
,
os
.
path
.
join
(
model_dir
,
'summaries/eval'
)))
@
combinations
.
generate
(
eager_strategy_combinations
())
def
test_train_check_callbacks
(
self
,
distribution
):
model_dir
=
self
.
create_tempdir
().
full_path
callback
=
RecordingCallback
()
callbacks
=
[
callback
]
input_fn
=
create_fake_data_input_fn
(
batch_size
=
8
,
features_shape
=
[
128
],
num_classes
=
3
)
model_training_utils
.
run_customized_training_loop
(
strategy
=
distribution
,
model_fn
=
self
.
_model_fn
,
loss_fn
=
tf
.
keras
.
losses
.
categorical_crossentropy
,
model_dir
=
model_dir
,
steps_per_epoch
=
20
,
num_eval_per_epoch
=
4
,
steps_per_loop
=
10
,
epochs
=
2
,
train_input_fn
=
input_fn
,
eval_input_fn
=
input_fn
,
eval_steps
=
10
,
init_checkpoint
=
None
,
metric_fn
=
metric_fn
,
custom_callbacks
=
callbacks
,
run_eagerly
=
False
)
self
.
assertEqual
(
callback
.
epoch_begin
,
[(
1
,
{}),
(
2
,
{})])
epoch_ends
,
epoch_end_infos
=
zip
(
*
callback
.
epoch_end
)
self
.
assertEqual
(
list
(
epoch_ends
),
[
1
,
2
,
2
])
for
info
in
epoch_end_infos
:
self
.
assertIn
(
'accuracy'
,
info
)
self
.
assertEqual
(
callback
.
batch_begin
,
[(
0
,
{}),
(
5
,
{}),
(
10
,
{}),
(
15
,
{}),
(
20
,
{}),
(
25
,
{}),
(
30
,
{}),
(
35
,
{})])
batch_ends
,
batch_end_infos
=
zip
(
*
callback
.
batch_end
)
self
.
assertEqual
(
list
(
batch_ends
),
[
4
,
9
,
14
,
19
,
24
,
29
,
34
,
39
])
for
info
in
batch_end_infos
:
self
.
assertIn
(
'loss'
,
info
)
@
combinations
.
generate
(
combinations
.
combine
(
distribution
=
[
strategy_combinations
.
one_device_strategy_gpu
,
],))
def
test_train_check_artifacts_non_chief
(
self
,
distribution
):
# We shouldn't export artifacts on non-chief workers. Since there's no easy
# way to test with real MultiWorkerMirroredStrategy, we patch the strategy
# to make it as if it's MultiWorkerMirroredStrategy on non-chief workers.
extended
=
distribution
.
extended
with
mock
.
patch
.
object
(
extended
.
__class__
,
'should_checkpoint'
,
new_callable
=
mock
.
PropertyMock
,
return_value
=
False
),
\
mock
.
patch
.
object
(
extended
.
__class__
,
'should_save_summary'
,
new_callable
=
mock
.
PropertyMock
,
return_value
=
False
):
model_dir
=
self
.
create_tempdir
().
full_path
self
.
run_training
(
distribution
,
model_dir
,
steps_per_loop
=
10
,
run_eagerly
=
False
)
self
.
assertEmpty
(
tf
.
io
.
gfile
.
listdir
(
model_dir
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/run_classifier.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""BERT classification or regression finetuning runner in TF 2.x."""
import
functools
import
json
import
math
import
os
# Import libraries
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
gin
import
tensorflow
as
tf
from
official.common
import
distribute_utils
from
official.modeling
import
performance
from
official.nlp
import
optimization
from
official.nlp.bert
import
bert_models
from
official.nlp.bert
import
common_flags
from
official.nlp.bert
import
configs
as
bert_configs
from
official.nlp.bert
import
input_pipeline
from
official.nlp.bert
import
model_saving_utils
from
official.utils.misc
import
keras_utils
flags
.
DEFINE_enum
(
'mode'
,
'train_and_eval'
,
[
'train_and_eval'
,
'export_only'
,
'predict'
],
'One of {"train_and_eval", "export_only", "predict"}. `train_and_eval`: '
'trains the model and evaluates in the meantime. '
'`export_only`: will take the latest checkpoint inside '
'model_dir and export a `SavedModel`. `predict`: takes a checkpoint and '
'restores the model to output predictions on the test set.'
)
flags
.
DEFINE_string
(
'train_data_path'
,
None
,
'Path to training data for BERT classifier.'
)
flags
.
DEFINE_string
(
'eval_data_path'
,
None
,
'Path to evaluation data for BERT classifier.'
)
flags
.
DEFINE_string
(
'input_meta_data_path'
,
None
,
'Path to file that contains meta data about input '
'to be used for training and evaluation.'
)
flags
.
DEFINE_integer
(
'train_data_size'
,
None
,
'Number of training samples '
'to use. If None, uses the full train data. '
'(default: None).'
)
flags
.
DEFINE_string
(
'predict_checkpoint_path'
,
None
,
'Path to the checkpoint for predictions.'
)
flags
.
DEFINE_integer
(
'num_eval_per_epoch'
,
1
,
'Number of evaluations per epoch. The purpose of this flag is to provide '
'more granular evaluation scores and checkpoints. For example, if original '
'data has N samples and num_eval_per_epoch is n, then each epoch will be '
'evaluated every N/n samples.'
)
flags
.
DEFINE_integer
(
'train_batch_size'
,
32
,
'Batch size for training.'
)
flags
.
DEFINE_integer
(
'eval_batch_size'
,
32
,
'Batch size for evaluation.'
)
common_flags
.
define_common_bert_flags
()
FLAGS
=
flags
.
FLAGS
LABEL_TYPES_MAP
=
{
'int'
:
tf
.
int64
,
'float'
:
tf
.
float32
}
def
get_loss_fn
(
num_classes
):
"""Gets the classification loss function."""
def
classification_loss_fn
(
labels
,
logits
):
"""Classification loss."""
labels
=
tf
.
squeeze
(
labels
)
log_probs
=
tf
.
nn
.
log_softmax
(
logits
,
axis
=-
1
)
one_hot_labels
=
tf
.
one_hot
(
tf
.
cast
(
labels
,
dtype
=
tf
.
int32
),
depth
=
num_classes
,
dtype
=
tf
.
float32
)
per_example_loss
=
-
tf
.
reduce_sum
(
tf
.
cast
(
one_hot_labels
,
dtype
=
tf
.
float32
)
*
log_probs
,
axis
=-
1
)
return
tf
.
reduce_mean
(
per_example_loss
)
return
classification_loss_fn
def
get_dataset_fn
(
input_file_pattern
,
max_seq_length
,
global_batch_size
,
is_training
,
label_type
=
tf
.
int64
,
include_sample_weights
=
False
,
num_samples
=
None
):
"""Gets a closure to create a dataset."""
def
_dataset_fn
(
ctx
=
None
):
"""Returns tf.data.Dataset for distributed BERT pretraining."""
batch_size
=
ctx
.
get_per_replica_batch_size
(
global_batch_size
)
if
ctx
else
global_batch_size
dataset
=
input_pipeline
.
create_classifier_dataset
(
tf
.
io
.
gfile
.
glob
(
input_file_pattern
),
max_seq_length
,
batch_size
,
is_training
=
is_training
,
input_pipeline_context
=
ctx
,
label_type
=
label_type
,
include_sample_weights
=
include_sample_weights
,
num_samples
=
num_samples
)
return
dataset
return
_dataset_fn
def
run_bert_classifier
(
strategy
,
bert_config
,
input_meta_data
,
model_dir
,
epochs
,
steps_per_epoch
,
steps_per_loop
,
eval_steps
,
warmup_steps
,
initial_lr
,
init_checkpoint
,
train_input_fn
,
eval_input_fn
,
training_callbacks
=
True
,
custom_callbacks
=
None
,
custom_metrics
=
None
):
"""Run BERT classifier training using low-level API."""
max_seq_length
=
input_meta_data
[
'max_seq_length'
]
num_classes
=
input_meta_data
.
get
(
'num_labels'
,
1
)
is_regression
=
num_classes
==
1
def
_get_classifier_model
():
"""Gets a classifier model."""
classifier_model
,
core_model
=
(
bert_models
.
classifier_model
(
bert_config
,
num_classes
,
max_seq_length
,
hub_module_url
=
FLAGS
.
hub_module_url
,
hub_module_trainable
=
FLAGS
.
hub_module_trainable
))
optimizer
=
optimization
.
create_optimizer
(
initial_lr
,
steps_per_epoch
*
epochs
,
warmup_steps
,
FLAGS
.
end_lr
,
FLAGS
.
optimizer_type
)
classifier_model
.
optimizer
=
performance
.
configure_optimizer
(
optimizer
,
use_float16
=
common_flags
.
use_float16
(),
use_graph_rewrite
=
common_flags
.
use_graph_rewrite
())
return
classifier_model
,
core_model
# tf.keras.losses objects accept optional sample_weight arguments (eg. coming
# from the dataset) to compute weighted loss, as used for the regression
# tasks. The classification tasks, using the custom get_loss_fn don't accept
# sample weights though.
loss_fn
=
(
tf
.
keras
.
losses
.
MeanSquaredError
()
if
is_regression
else
get_loss_fn
(
num_classes
))
# Defines evaluation metrics function, which will create metrics in the
# correct device and strategy scope.
if
custom_metrics
:
metric_fn
=
custom_metrics
elif
is_regression
:
metric_fn
=
functools
.
partial
(
tf
.
keras
.
metrics
.
MeanSquaredError
,
'mean_squared_error'
,
dtype
=
tf
.
float32
)
else
:
metric_fn
=
functools
.
partial
(
tf
.
keras
.
metrics
.
SparseCategoricalAccuracy
,
'accuracy'
,
dtype
=
tf
.
float32
)
# Start training using Keras compile/fit API.
logging
.
info
(
'Training using TF 2.x Keras compile/fit API with '
'distribution strategy.'
)
return
run_keras_compile_fit
(
model_dir
,
strategy
,
_get_classifier_model
,
train_input_fn
,
eval_input_fn
,
loss_fn
,
metric_fn
,
init_checkpoint
,
epochs
,
steps_per_epoch
,
steps_per_loop
,
eval_steps
,
training_callbacks
=
training_callbacks
,
custom_callbacks
=
custom_callbacks
)
def
run_keras_compile_fit
(
model_dir
,
strategy
,
model_fn
,
train_input_fn
,
eval_input_fn
,
loss_fn
,
metric_fn
,
init_checkpoint
,
epochs
,
steps_per_epoch
,
steps_per_loop
,
eval_steps
,
training_callbacks
=
True
,
custom_callbacks
=
None
):
"""Runs BERT classifier model using Keras compile/fit API."""
with
strategy
.
scope
():
training_dataset
=
train_input_fn
()
evaluation_dataset
=
eval_input_fn
()
if
eval_input_fn
else
None
bert_model
,
sub_model
=
model_fn
()
optimizer
=
bert_model
.
optimizer
if
init_checkpoint
:
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
sub_model
,
encoder
=
sub_model
)
checkpoint
.
read
(
init_checkpoint
).
assert_existing_objects_matched
()
if
not
isinstance
(
metric_fn
,
(
list
,
tuple
)):
metric_fn
=
[
metric_fn
]
bert_model
.
compile
(
optimizer
=
optimizer
,
loss
=
loss_fn
,
metrics
=
[
fn
()
for
fn
in
metric_fn
],
steps_per_execution
=
steps_per_loop
)
summary_dir
=
os
.
path
.
join
(
model_dir
,
'summaries'
)
summary_callback
=
tf
.
keras
.
callbacks
.
TensorBoard
(
summary_dir
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
bert_model
,
optimizer
=
optimizer
)
checkpoint_manager
=
tf
.
train
.
CheckpointManager
(
checkpoint
,
directory
=
model_dir
,
max_to_keep
=
None
,
step_counter
=
optimizer
.
iterations
,
checkpoint_interval
=
0
)
checkpoint_callback
=
keras_utils
.
SimpleCheckpoint
(
checkpoint_manager
)
if
training_callbacks
:
if
custom_callbacks
is
not
None
:
custom_callbacks
+=
[
summary_callback
,
checkpoint_callback
]
else
:
custom_callbacks
=
[
summary_callback
,
checkpoint_callback
]
history
=
bert_model
.
fit
(
x
=
training_dataset
,
validation_data
=
evaluation_dataset
,
steps_per_epoch
=
steps_per_epoch
,
epochs
=
epochs
,
validation_steps
=
eval_steps
,
callbacks
=
custom_callbacks
)
stats
=
{
'total_training_steps'
:
steps_per_epoch
*
epochs
}
if
'loss'
in
history
.
history
:
stats
[
'train_loss'
]
=
history
.
history
[
'loss'
][
-
1
]
if
'val_accuracy'
in
history
.
history
:
stats
[
'eval_metrics'
]
=
history
.
history
[
'val_accuracy'
][
-
1
]
return
bert_model
,
stats
def
get_predictions_and_labels
(
strategy
,
trained_model
,
eval_input_fn
,
is_regression
=
False
,
return_probs
=
False
):
"""Obtains predictions of trained model on evaluation data.
Note that list of labels is returned along with the predictions because the
order changes on distributing dataset over TPU pods.
Args:
strategy: Distribution strategy.
trained_model: Trained model with preloaded weights.
eval_input_fn: Input function for evaluation data.
is_regression: Whether it is a regression task.
return_probs: Whether to return probabilities of classes.
Returns:
predictions: List of predictions.
labels: List of gold labels corresponding to predictions.
"""
@
tf
.
function
def
test_step
(
iterator
):
"""Computes predictions on distributed devices."""
def
_test_step_fn
(
inputs
):
"""Replicated predictions."""
inputs
,
labels
=
inputs
logits
=
trained_model
(
inputs
,
training
=
False
)
if
not
is_regression
:
probabilities
=
tf
.
nn
.
softmax
(
logits
)
return
probabilities
,
labels
else
:
return
logits
,
labels
outputs
,
labels
=
strategy
.
run
(
_test_step_fn
,
args
=
(
next
(
iterator
),))
# outputs: current batch logits as a tuple of shard logits
outputs
=
tf
.
nest
.
map_structure
(
strategy
.
experimental_local_results
,
outputs
)
labels
=
tf
.
nest
.
map_structure
(
strategy
.
experimental_local_results
,
labels
)
return
outputs
,
labels
def
_run_evaluation
(
test_iterator
):
"""Runs evaluation steps."""
preds
,
golds
=
list
(),
list
()
try
:
with
tf
.
experimental
.
async_scope
():
while
True
:
probabilities
,
labels
=
test_step
(
test_iterator
)
for
cur_probs
,
cur_labels
in
zip
(
probabilities
,
labels
):
if
return_probs
:
preds
.
extend
(
cur_probs
.
numpy
().
tolist
())
else
:
preds
.
extend
(
tf
.
math
.
argmax
(
cur_probs
,
axis
=
1
).
numpy
())
golds
.
extend
(
cur_labels
.
numpy
().
tolist
())
except
(
StopIteration
,
tf
.
errors
.
OutOfRangeError
):
tf
.
experimental
.
async_clear_error
()
return
preds
,
golds
test_iter
=
iter
(
strategy
.
distribute_datasets_from_function
(
eval_input_fn
))
predictions
,
labels
=
_run_evaluation
(
test_iter
)
return
predictions
,
labels
def
export_classifier
(
model_export_path
,
input_meta_data
,
bert_config
,
model_dir
):
"""Exports a trained model as a `SavedModel` for inference.
Args:
model_export_path: a string specifying the path to the SavedModel directory.
input_meta_data: dictionary containing meta data about input and model.
bert_config: Bert configuration file to define core bert layers.
model_dir: The directory where the model weights and training/evaluation
summaries are stored.
Raises:
Export path is not specified, got an empty string or None.
"""
if
not
model_export_path
:
raise
ValueError
(
'Export path is not specified: %s'
%
model_export_path
)
if
not
model_dir
:
raise
ValueError
(
'Export path is not specified: %s'
%
model_dir
)
# Export uses float32 for now, even if training uses mixed precision.
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'float32'
)
classifier_model
=
bert_models
.
classifier_model
(
bert_config
,
input_meta_data
.
get
(
'num_labels'
,
1
),
hub_module_url
=
FLAGS
.
hub_module_url
,
hub_module_trainable
=
False
)[
0
]
model_saving_utils
.
export_bert_model
(
model_export_path
,
model
=
classifier_model
,
checkpoint_dir
=
model_dir
)
def
run_bert
(
strategy
,
input_meta_data
,
model_config
,
train_input_fn
=
None
,
eval_input_fn
=
None
,
init_checkpoint
=
None
,
custom_callbacks
=
None
,
custom_metrics
=
None
):
"""Run BERT training."""
# Enables XLA in Session Config. Should not be set for TPU.
keras_utils
.
set_session_config
(
FLAGS
.
enable_xla
)
performance
.
set_mixed_precision_policy
(
common_flags
.
dtype
())
epochs
=
FLAGS
.
num_train_epochs
*
FLAGS
.
num_eval_per_epoch
train_data_size
=
(
input_meta_data
[
'train_data_size'
]
//
FLAGS
.
num_eval_per_epoch
)
if
FLAGS
.
train_data_size
:
train_data_size
=
min
(
train_data_size
,
FLAGS
.
train_data_size
)
logging
.
info
(
'Updated train_data_size: %s'
,
train_data_size
)
steps_per_epoch
=
int
(
train_data_size
/
FLAGS
.
train_batch_size
)
warmup_steps
=
int
(
epochs
*
train_data_size
*
0.1
/
FLAGS
.
train_batch_size
)
eval_steps
=
int
(
math
.
ceil
(
input_meta_data
[
'eval_data_size'
]
/
FLAGS
.
eval_batch_size
))
if
not
strategy
:
raise
ValueError
(
'Distribution strategy has not been specified.'
)
if
not
custom_callbacks
:
custom_callbacks
=
[]
if
FLAGS
.
log_steps
:
custom_callbacks
.
append
(
keras_utils
.
TimeHistory
(
batch_size
=
FLAGS
.
train_batch_size
,
log_steps
=
FLAGS
.
log_steps
,
logdir
=
FLAGS
.
model_dir
))
trained_model
,
_
=
run_bert_classifier
(
strategy
,
model_config
,
input_meta_data
,
FLAGS
.
model_dir
,
epochs
,
steps_per_epoch
,
FLAGS
.
steps_per_loop
,
eval_steps
,
warmup_steps
,
FLAGS
.
learning_rate
,
init_checkpoint
or
FLAGS
.
init_checkpoint
,
train_input_fn
,
eval_input_fn
,
custom_callbacks
=
custom_callbacks
,
custom_metrics
=
custom_metrics
)
if
FLAGS
.
model_export_path
:
model_saving_utils
.
export_bert_model
(
FLAGS
.
model_export_path
,
model
=
trained_model
)
return
trained_model
def
custom_main
(
custom_callbacks
=
None
,
custom_metrics
=
None
):
"""Run classification or regression.
Args:
custom_callbacks: list of tf.keras.Callbacks passed to training loop.
custom_metrics: list of metrics passed to the training loop.
"""
gin
.
parse_config_files_and_bindings
(
FLAGS
.
gin_file
,
FLAGS
.
gin_param
)
with
tf
.
io
.
gfile
.
GFile
(
FLAGS
.
input_meta_data_path
,
'rb'
)
as
reader
:
input_meta_data
=
json
.
loads
(
reader
.
read
().
decode
(
'utf-8'
))
label_type
=
LABEL_TYPES_MAP
[
input_meta_data
.
get
(
'label_type'
,
'int'
)]
include_sample_weights
=
input_meta_data
.
get
(
'has_sample_weights'
,
False
)
if
not
FLAGS
.
model_dir
:
FLAGS
.
model_dir
=
'/tmp/bert20/'
bert_config
=
bert_configs
.
BertConfig
.
from_json_file
(
FLAGS
.
bert_config_file
)
if
FLAGS
.
mode
==
'export_only'
:
export_classifier
(
FLAGS
.
model_export_path
,
input_meta_data
,
bert_config
,
FLAGS
.
model_dir
)
return
strategy
=
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
FLAGS
.
distribution_strategy
,
num_gpus
=
FLAGS
.
num_gpus
,
tpu_address
=
FLAGS
.
tpu
)
eval_input_fn
=
get_dataset_fn
(
FLAGS
.
eval_data_path
,
input_meta_data
[
'max_seq_length'
],
FLAGS
.
eval_batch_size
,
is_training
=
False
,
label_type
=
label_type
,
include_sample_weights
=
include_sample_weights
)
if
FLAGS
.
mode
==
'predict'
:
num_labels
=
input_meta_data
.
get
(
'num_labels'
,
1
)
with
strategy
.
scope
():
classifier_model
=
bert_models
.
classifier_model
(
bert_config
,
num_labels
)[
0
]
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
classifier_model
)
latest_checkpoint_file
=
(
FLAGS
.
predict_checkpoint_path
or
tf
.
train
.
latest_checkpoint
(
FLAGS
.
model_dir
))
assert
latest_checkpoint_file
logging
.
info
(
'Checkpoint file %s found and restoring from '
'checkpoint'
,
latest_checkpoint_file
)
checkpoint
.
restore
(
latest_checkpoint_file
).
assert_existing_objects_matched
()
preds
,
_
=
get_predictions_and_labels
(
strategy
,
classifier_model
,
eval_input_fn
,
is_regression
=
(
num_labels
==
1
),
return_probs
=
True
)
output_predict_file
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'test_results.tsv'
)
with
tf
.
io
.
gfile
.
GFile
(
output_predict_file
,
'w'
)
as
writer
:
logging
.
info
(
'***** Predict results *****'
)
for
probabilities
in
preds
:
output_line
=
'
\t
'
.
join
(
str
(
class_probability
)
for
class_probability
in
probabilities
)
+
'
\n
'
writer
.
write
(
output_line
)
return
if
FLAGS
.
mode
!=
'train_and_eval'
:
raise
ValueError
(
'Unsupported mode is specified: %s'
%
FLAGS
.
mode
)
train_input_fn
=
get_dataset_fn
(
FLAGS
.
train_data_path
,
input_meta_data
[
'max_seq_length'
],
FLAGS
.
train_batch_size
,
is_training
=
True
,
label_type
=
label_type
,
include_sample_weights
=
include_sample_weights
,
num_samples
=
FLAGS
.
train_data_size
)
run_bert
(
strategy
,
input_meta_data
,
bert_config
,
train_input_fn
,
eval_input_fn
,
custom_callbacks
=
custom_callbacks
,
custom_metrics
=
custom_metrics
)
def
main
(
_
):
custom_main
(
custom_callbacks
=
None
,
custom_metrics
=
None
)
if
__name__
==
'__main__'
:
flags
.
mark_flag_as_required
(
'bert_config_file'
)
flags
.
mark_flag_as_required
(
'input_meta_data_path'
)
flags
.
mark_flag_as_required
(
'model_dir'
)
app
.
run
(
main
)
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/run_pretraining.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Run masked LM/next sentence pre-training for BERT in TF 2.x."""
# Import libraries
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
gin
import
tensorflow
as
tf
from
official.common
import
distribute_utils
from
official.modeling
import
performance
from
official.nlp
import
optimization
from
official.nlp.bert
import
bert_models
from
official.nlp.bert
import
common_flags
from
official.nlp.bert
import
configs
from
official.nlp.bert
import
input_pipeline
from
official.nlp.bert
import
model_training_utils
flags
.
DEFINE_string
(
'input_files'
,
None
,
'File path to retrieve training data for pre-training.'
)
# Model training specific flags.
flags
.
DEFINE_integer
(
'max_seq_length'
,
128
,
'The maximum total input sequence length after WordPiece tokenization. '
'Sequences longer than this will be truncated, and sequences shorter '
'than this will be padded.'
)
flags
.
DEFINE_integer
(
'max_predictions_per_seq'
,
20
,
'Maximum predictions per sequence_output.'
)
flags
.
DEFINE_integer
(
'train_batch_size'
,
32
,
'Total batch size for training.'
)
flags
.
DEFINE_integer
(
'num_steps_per_epoch'
,
1000
,
'Total number of training steps to run per epoch.'
)
flags
.
DEFINE_float
(
'warmup_steps'
,
10000
,
'Warmup steps for Adam weight decay optimizer.'
)
flags
.
DEFINE_bool
(
'use_next_sentence_label'
,
True
,
'Whether to use next sentence label to compute final loss.'
)
flags
.
DEFINE_bool
(
'train_summary_interval'
,
0
,
'Step interval for training '
'summaries. If the value is a negative number, '
'then training summaries are not enabled.'
)
common_flags
.
define_common_bert_flags
()
FLAGS
=
flags
.
FLAGS
def
get_pretrain_dataset_fn
(
input_file_pattern
,
seq_length
,
max_predictions_per_seq
,
global_batch_size
,
use_next_sentence_label
=
True
):
"""Returns input dataset from input file string."""
def
_dataset_fn
(
ctx
=
None
):
"""Returns tf.data.Dataset for distributed BERT pretraining."""
input_patterns
=
input_file_pattern
.
split
(
','
)
batch_size
=
ctx
.
get_per_replica_batch_size
(
global_batch_size
)
train_dataset
=
input_pipeline
.
create_pretrain_dataset
(
input_patterns
,
seq_length
,
max_predictions_per_seq
,
batch_size
,
is_training
=
True
,
input_pipeline_context
=
ctx
,
use_next_sentence_label
=
use_next_sentence_label
)
return
train_dataset
return
_dataset_fn
def
get_loss_fn
():
"""Returns loss function for BERT pretraining."""
def
_bert_pretrain_loss_fn
(
unused_labels
,
losses
,
**
unused_args
):
return
tf
.
reduce_mean
(
losses
)
return
_bert_pretrain_loss_fn
def
run_customized_training
(
strategy
,
bert_config
,
init_checkpoint
,
max_seq_length
,
max_predictions_per_seq
,
model_dir
,
steps_per_epoch
,
steps_per_loop
,
epochs
,
initial_lr
,
warmup_steps
,
end_lr
,
optimizer_type
,
input_files
,
train_batch_size
,
use_next_sentence_label
=
True
,
train_summary_interval
=
0
,
custom_callbacks
=
None
,
explicit_allreduce
=
False
,
pre_allreduce_callbacks
=
None
,
post_allreduce_callbacks
=
None
,
allreduce_bytes_per_pack
=
0
):
"""Run BERT pretrain model training using low-level API."""
train_input_fn
=
get_pretrain_dataset_fn
(
input_files
,
max_seq_length
,
max_predictions_per_seq
,
train_batch_size
,
use_next_sentence_label
)
def
_get_pretrain_model
():
"""Gets a pretraining model."""
pretrain_model
,
core_model
=
bert_models
.
pretrain_model
(
bert_config
,
max_seq_length
,
max_predictions_per_seq
,
use_next_sentence_label
=
use_next_sentence_label
)
optimizer
=
optimization
.
create_optimizer
(
initial_lr
,
steps_per_epoch
*
epochs
,
warmup_steps
,
end_lr
,
optimizer_type
)
pretrain_model
.
optimizer
=
performance
.
configure_optimizer
(
optimizer
,
use_float16
=
common_flags
.
use_float16
(),
use_graph_rewrite
=
common_flags
.
use_graph_rewrite
())
return
pretrain_model
,
core_model
trained_model
=
model_training_utils
.
run_customized_training_loop
(
strategy
=
strategy
,
model_fn
=
_get_pretrain_model
,
loss_fn
=
get_loss_fn
(),
scale_loss
=
FLAGS
.
scale_loss
,
model_dir
=
model_dir
,
init_checkpoint
=
init_checkpoint
,
train_input_fn
=
train_input_fn
,
steps_per_epoch
=
steps_per_epoch
,
steps_per_loop
=
steps_per_loop
,
epochs
=
epochs
,
sub_model_export_name
=
'pretrained/bert_model'
,
explicit_allreduce
=
explicit_allreduce
,
pre_allreduce_callbacks
=
pre_allreduce_callbacks
,
post_allreduce_callbacks
=
post_allreduce_callbacks
,
allreduce_bytes_per_pack
=
allreduce_bytes_per_pack
,
train_summary_interval
=
train_summary_interval
,
custom_callbacks
=
custom_callbacks
)
return
trained_model
def
run_bert_pretrain
(
strategy
,
custom_callbacks
=
None
):
"""Runs BERT pre-training."""
bert_config
=
configs
.
BertConfig
.
from_json_file
(
FLAGS
.
bert_config_file
)
if
not
strategy
:
raise
ValueError
(
'Distribution strategy is not specified.'
)
# Runs customized training loop.
logging
.
info
(
'Training using customized training loop TF 2.0 with distributed'
'strategy.'
)
performance
.
set_mixed_precision_policy
(
common_flags
.
dtype
())
# Only when explicit_allreduce = True, post_allreduce_callbacks and
# allreduce_bytes_per_pack will take effect. optimizer.apply_gradients() no
# longer implicitly allreduce gradients, users manually allreduce gradient and
# pass the allreduced grads_and_vars to apply_gradients().
# With explicit_allreduce = True, clip_by_global_norm is moved to after
# allreduce.
return
run_customized_training
(
strategy
,
bert_config
,
FLAGS
.
init_checkpoint
,
# Used to initialize only the BERT submodel.
FLAGS
.
max_seq_length
,
FLAGS
.
max_predictions_per_seq
,
FLAGS
.
model_dir
,
FLAGS
.
num_steps_per_epoch
,
FLAGS
.
steps_per_loop
,
FLAGS
.
num_train_epochs
,
FLAGS
.
learning_rate
,
FLAGS
.
warmup_steps
,
FLAGS
.
end_lr
,
FLAGS
.
optimizer_type
,
FLAGS
.
input_files
,
FLAGS
.
train_batch_size
,
FLAGS
.
use_next_sentence_label
,
FLAGS
.
train_summary_interval
,
custom_callbacks
=
custom_callbacks
,
explicit_allreduce
=
FLAGS
.
explicit_allreduce
,
pre_allreduce_callbacks
=
[
model_training_utils
.
clip_by_global_norm_callback
],
allreduce_bytes_per_pack
=
FLAGS
.
allreduce_bytes_per_pack
)
def
main
(
_
):
gin
.
parse_config_files_and_bindings
(
FLAGS
.
gin_file
,
FLAGS
.
gin_param
)
if
not
FLAGS
.
model_dir
:
FLAGS
.
model_dir
=
'/tmp/bert20/'
# Configures cluster spec for multi-worker distribution strategy.
if
FLAGS
.
num_gpus
>
0
:
_
=
distribute_utils
.
configure_cluster
(
FLAGS
.
worker_hosts
,
FLAGS
.
task_index
)
strategy
=
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
FLAGS
.
distribution_strategy
,
num_gpus
=
FLAGS
.
num_gpus
,
all_reduce_alg
=
FLAGS
.
all_reduce_alg
,
tpu_address
=
FLAGS
.
tpu
)
if
strategy
:
print
(
'***** Number of cores used : '
,
strategy
.
num_replicas_in_sync
)
run_bert_pretrain
(
strategy
)
if
__name__
==
'__main__'
:
app
.
run
(
main
)
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/run_squad.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Run BERT on SQuAD 1.1 and SQuAD 2.0 in TF 2.x."""
import
json
import
os
import
time
# Import libraries
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
gin
import
tensorflow
as
tf
from
official.common
import
distribute_utils
from
official.nlp.bert
import
configs
as
bert_configs
from
official.nlp.bert
import
run_squad_helper
from
official.nlp.bert
import
tokenization
from
official.nlp.data
import
squad_lib
as
squad_lib_wp
from
official.utils.misc
import
keras_utils
flags
.
DEFINE_string
(
'vocab_file'
,
None
,
'The vocabulary file that the BERT model was trained on.'
)
# More flags can be found in run_squad_helper.
run_squad_helper
.
define_common_squad_flags
()
FLAGS
=
flags
.
FLAGS
def
train_squad
(
strategy
,
input_meta_data
,
custom_callbacks
=
None
,
run_eagerly
=
False
,
init_checkpoint
=
None
,
sub_model_export_name
=
None
):
"""Run bert squad training."""
bert_config
=
bert_configs
.
BertConfig
.
from_json_file
(
FLAGS
.
bert_config_file
)
init_checkpoint
=
init_checkpoint
or
FLAGS
.
init_checkpoint
run_squad_helper
.
train_squad
(
strategy
,
input_meta_data
,
bert_config
,
custom_callbacks
,
run_eagerly
,
init_checkpoint
,
sub_model_export_name
=
sub_model_export_name
)
def
predict_squad
(
strategy
,
input_meta_data
):
"""Makes predictions for the squad dataset."""
bert_config
=
bert_configs
.
BertConfig
.
from_json_file
(
FLAGS
.
bert_config_file
)
tokenizer
=
tokenization
.
FullTokenizer
(
vocab_file
=
FLAGS
.
vocab_file
,
do_lower_case
=
FLAGS
.
do_lower_case
)
run_squad_helper
.
predict_squad
(
strategy
,
input_meta_data
,
tokenizer
,
bert_config
,
squad_lib_wp
)
def
eval_squad
(
strategy
,
input_meta_data
):
"""Evaluate on the squad dataset."""
bert_config
=
bert_configs
.
BertConfig
.
from_json_file
(
FLAGS
.
bert_config_file
)
tokenizer
=
tokenization
.
FullTokenizer
(
vocab_file
=
FLAGS
.
vocab_file
,
do_lower_case
=
FLAGS
.
do_lower_case
)
eval_metrics
=
run_squad_helper
.
eval_squad
(
strategy
,
input_meta_data
,
tokenizer
,
bert_config
,
squad_lib_wp
)
return
eval_metrics
def
export_squad
(
model_export_path
,
input_meta_data
):
"""Exports a trained model as a `SavedModel` for inference.
Args:
model_export_path: a string specifying the path to the SavedModel directory.
input_meta_data: dictionary containing meta data about input and model.
Raises:
Export path is not specified, got an empty string or None.
"""
bert_config
=
bert_configs
.
BertConfig
.
from_json_file
(
FLAGS
.
bert_config_file
)
run_squad_helper
.
export_squad
(
model_export_path
,
input_meta_data
,
bert_config
)
def
main
(
_
):
gin
.
parse_config_files_and_bindings
(
FLAGS
.
gin_file
,
FLAGS
.
gin_param
)
with
tf
.
io
.
gfile
.
GFile
(
FLAGS
.
input_meta_data_path
,
'rb'
)
as
reader
:
input_meta_data
=
json
.
loads
(
reader
.
read
().
decode
(
'utf-8'
))
if
FLAGS
.
mode
==
'export_only'
:
export_squad
(
FLAGS
.
model_export_path
,
input_meta_data
)
return
# Configures cluster spec for multi-worker distribution strategy.
if
FLAGS
.
num_gpus
>
0
:
_
=
distribute_utils
.
configure_cluster
(
FLAGS
.
worker_hosts
,
FLAGS
.
task_index
)
strategy
=
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
FLAGS
.
distribution_strategy
,
num_gpus
=
FLAGS
.
num_gpus
,
all_reduce_alg
=
FLAGS
.
all_reduce_alg
,
tpu_address
=
FLAGS
.
tpu
)
if
'train'
in
FLAGS
.
mode
:
if
FLAGS
.
log_steps
:
custom_callbacks
=
[
keras_utils
.
TimeHistory
(
batch_size
=
FLAGS
.
train_batch_size
,
log_steps
=
FLAGS
.
log_steps
,
logdir
=
FLAGS
.
model_dir
,
)]
else
:
custom_callbacks
=
None
train_squad
(
strategy
,
input_meta_data
,
custom_callbacks
=
custom_callbacks
,
run_eagerly
=
FLAGS
.
run_eagerly
,
sub_model_export_name
=
FLAGS
.
sub_model_export_name
,
)
if
'predict'
in
FLAGS
.
mode
:
predict_squad
(
strategy
,
input_meta_data
)
if
'eval'
in
FLAGS
.
mode
:
eval_metrics
=
eval_squad
(
strategy
,
input_meta_data
)
f1_score
=
eval_metrics
[
'final_f1'
]
logging
.
info
(
'SQuAD eval F1-score: %f'
,
f1_score
)
summary_dir
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries'
,
'eval'
)
summary_writer
=
tf
.
summary
.
create_file_writer
(
summary_dir
)
with
summary_writer
.
as_default
():
# TODO(lehou): write to the correct step number.
tf
.
summary
.
scalar
(
'F1-score'
,
f1_score
,
step
=
0
)
summary_writer
.
flush
()
# Also write eval_metrics to json file.
squad_lib_wp
.
write_to_json_files
(
eval_metrics
,
os
.
path
.
join
(
summary_dir
,
'eval_metrics.json'
))
time
.
sleep
(
60
)
if
__name__
==
'__main__'
:
flags
.
mark_flag_as_required
(
'bert_config_file'
)
flags
.
mark_flag_as_required
(
'model_dir'
)
app
.
run
(
main
)
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/run_squad_helper.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Library for running BERT family models on SQuAD 1.1/2.0 in TF 2.x."""
import
collections
import
json
import
os
from
absl
import
flags
from
absl
import
logging
import
tensorflow
as
tf
from
official.modeling
import
performance
from
official.nlp
import
optimization
from
official.nlp.bert
import
bert_models
from
official.nlp.bert
import
common_flags
from
official.nlp.bert
import
input_pipeline
from
official.nlp.bert
import
model_saving_utils
from
official.nlp.bert
import
model_training_utils
from
official.nlp.bert
import
squad_evaluate_v1_1
from
official.nlp.bert
import
squad_evaluate_v2_0
from
official.nlp.data
import
squad_lib_sp
from
official.utils.misc
import
keras_utils
def
define_common_squad_flags
():
"""Defines common flags used by SQuAD tasks."""
flags
.
DEFINE_enum
(
'mode'
,
'train_and_eval'
,
[
'train_and_eval'
,
'train_and_predict'
,
'train'
,
'eval'
,
'predict'
,
'export_only'
],
'One of {"train_and_eval", "train_and_predict", '
'"train", "eval", "predict", "export_only"}. '
'`train_and_eval`: train & predict to json files & compute eval metrics. '
'`train_and_predict`: train & predict to json files. '
'`train`: only trains the model. '
'`eval`: predict answers from squad json file & compute eval metrics. '
'`predict`: predict answers from the squad json file. '
'`export_only`: will take the latest checkpoint inside '
'model_dir and export a `SavedModel`.'
)
flags
.
DEFINE_string
(
'train_data_path'
,
''
,
'Training data path with train tfrecords.'
)
flags
.
DEFINE_string
(
'input_meta_data_path'
,
None
,
'Path to file that contains meta data about input '
'to be used for training and evaluation.'
)
# Model training specific flags.
flags
.
DEFINE_integer
(
'train_batch_size'
,
32
,
'Total batch size for training.'
)
# Predict processing related.
flags
.
DEFINE_string
(
'predict_file'
,
None
,
'SQuAD prediction json file path. '
'`predict` mode supports multiple files: one can use '
'wildcard to specify multiple files and it can also be '
'multiple file patterns separated by comma. Note that '
'`eval` mode only supports a single predict file.'
)
flags
.
DEFINE_bool
(
'do_lower_case'
,
True
,
'Whether to lower case the input text. Should be True for uncased '
'models and False for cased models.'
)
flags
.
DEFINE_float
(
'null_score_diff_threshold'
,
0.0
,
'If null_score - best_non_null is greater than the threshold, '
'predict null. This is only used for SQuAD v2.'
)
flags
.
DEFINE_bool
(
'verbose_logging'
,
False
,
'If true, all of the warnings related to data processing will be '
'printed. A number of warnings are expected for a normal SQuAD '
'evaluation.'
)
flags
.
DEFINE_integer
(
'predict_batch_size'
,
8
,
'Total batch size for prediction.'
)
flags
.
DEFINE_integer
(
'n_best_size'
,
20
,
'The total number of n-best predictions to generate in the '
'nbest_predictions.json output file.'
)
flags
.
DEFINE_integer
(
'max_answer_length'
,
30
,
'The maximum length of an answer that can be generated. This is needed '
'because the start and end predictions are not conditioned on one '
'another.'
)
common_flags
.
define_common_bert_flags
()
FLAGS
=
flags
.
FLAGS
def
squad_loss_fn
(
start_positions
,
end_positions
,
start_logits
,
end_logits
):
"""Returns sparse categorical crossentropy for start/end logits."""
start_loss
=
tf
.
keras
.
losses
.
sparse_categorical_crossentropy
(
start_positions
,
start_logits
,
from_logits
=
True
)
end_loss
=
tf
.
keras
.
losses
.
sparse_categorical_crossentropy
(
end_positions
,
end_logits
,
from_logits
=
True
)
total_loss
=
(
tf
.
reduce_mean
(
start_loss
)
+
tf
.
reduce_mean
(
end_loss
))
/
2
return
total_loss
def
get_loss_fn
():
"""Gets a loss function for squad task."""
def
_loss_fn
(
labels
,
model_outputs
):
start_positions
=
labels
[
'start_positions'
]
end_positions
=
labels
[
'end_positions'
]
start_logits
,
end_logits
=
model_outputs
return
squad_loss_fn
(
start_positions
,
end_positions
,
start_logits
,
end_logits
)
return
_loss_fn
RawResult
=
collections
.
namedtuple
(
'RawResult'
,
[
'unique_id'
,
'start_logits'
,
'end_logits'
])
def
get_raw_results
(
predictions
):
"""Converts multi-replica predictions to RawResult."""
for
unique_ids
,
start_logits
,
end_logits
in
zip
(
predictions
[
'unique_ids'
],
predictions
[
'start_logits'
],
predictions
[
'end_logits'
]):
for
values
in
zip
(
unique_ids
.
numpy
(),
start_logits
.
numpy
(),
end_logits
.
numpy
()):
yield
RawResult
(
unique_id
=
values
[
0
],
start_logits
=
values
[
1
].
tolist
(),
end_logits
=
values
[
2
].
tolist
())
def
get_dataset_fn
(
input_file_pattern
,
max_seq_length
,
global_batch_size
,
is_training
):
"""Gets a closure to create a dataset.."""
def
_dataset_fn
(
ctx
=
None
):
"""Returns tf.data.Dataset for distributed BERT pretraining."""
batch_size
=
ctx
.
get_per_replica_batch_size
(
global_batch_size
)
if
ctx
else
global_batch_size
dataset
=
input_pipeline
.
create_squad_dataset
(
input_file_pattern
,
max_seq_length
,
batch_size
,
is_training
=
is_training
,
input_pipeline_context
=
ctx
)
return
dataset
return
_dataset_fn
def
get_squad_model_to_predict
(
strategy
,
bert_config
,
checkpoint_path
,
input_meta_data
):
"""Gets a squad model to make predictions."""
with
strategy
.
scope
():
# Prediction always uses float32, even if training uses mixed precision.
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'float32'
)
squad_model
,
_
=
bert_models
.
squad_model
(
bert_config
,
input_meta_data
[
'max_seq_length'
],
hub_module_url
=
FLAGS
.
hub_module_url
)
if
checkpoint_path
is
None
:
checkpoint_path
=
tf
.
train
.
latest_checkpoint
(
FLAGS
.
model_dir
)
logging
.
info
(
'Restoring checkpoints from %s'
,
checkpoint_path
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
squad_model
)
checkpoint
.
restore
(
checkpoint_path
).
expect_partial
()
return
squad_model
def
predict_squad_customized
(
strategy
,
input_meta_data
,
predict_tfrecord_path
,
num_steps
,
squad_model
):
"""Make predictions using a Bert-based squad model."""
predict_dataset_fn
=
get_dataset_fn
(
predict_tfrecord_path
,
input_meta_data
[
'max_seq_length'
],
FLAGS
.
predict_batch_size
,
is_training
=
False
)
predict_iterator
=
iter
(
strategy
.
distribute_datasets_from_function
(
predict_dataset_fn
))
@
tf
.
function
def
predict_step
(
iterator
):
"""Predicts on distributed devices."""
def
_replicated_step
(
inputs
):
"""Replicated prediction calculation."""
x
,
_
=
inputs
unique_ids
=
x
.
pop
(
'unique_ids'
)
start_logits
,
end_logits
=
squad_model
(
x
,
training
=
False
)
return
dict
(
unique_ids
=
unique_ids
,
start_logits
=
start_logits
,
end_logits
=
end_logits
)
outputs
=
strategy
.
run
(
_replicated_step
,
args
=
(
next
(
iterator
),))
return
tf
.
nest
.
map_structure
(
strategy
.
experimental_local_results
,
outputs
)
all_results
=
[]
for
_
in
range
(
num_steps
):
predictions
=
predict_step
(
predict_iterator
)
for
result
in
get_raw_results
(
predictions
):
all_results
.
append
(
result
)
if
len
(
all_results
)
%
100
==
0
:
logging
.
info
(
'Made predictions for %d records.'
,
len
(
all_results
))
return
all_results
def
train_squad
(
strategy
,
input_meta_data
,
bert_config
,
custom_callbacks
=
None
,
run_eagerly
=
False
,
init_checkpoint
=
None
,
sub_model_export_name
=
None
):
"""Run bert squad training."""
if
strategy
:
logging
.
info
(
'Training using customized training loop with distribution'
' strategy.'
)
# Enables XLA in Session Config. Should not be set for TPU.
keras_utils
.
set_session_config
(
FLAGS
.
enable_xla
)
performance
.
set_mixed_precision_policy
(
common_flags
.
dtype
())
epochs
=
FLAGS
.
num_train_epochs
num_train_examples
=
input_meta_data
[
'train_data_size'
]
max_seq_length
=
input_meta_data
[
'max_seq_length'
]
steps_per_epoch
=
int
(
num_train_examples
/
FLAGS
.
train_batch_size
)
warmup_steps
=
int
(
epochs
*
num_train_examples
*
0.1
/
FLAGS
.
train_batch_size
)
train_input_fn
=
get_dataset_fn
(
FLAGS
.
train_data_path
,
max_seq_length
,
FLAGS
.
train_batch_size
,
is_training
=
True
)
def
_get_squad_model
():
"""Get Squad model and optimizer."""
squad_model
,
core_model
=
bert_models
.
squad_model
(
bert_config
,
max_seq_length
,
hub_module_url
=
FLAGS
.
hub_module_url
,
hub_module_trainable
=
FLAGS
.
hub_module_trainable
)
optimizer
=
optimization
.
create_optimizer
(
FLAGS
.
learning_rate
,
steps_per_epoch
*
epochs
,
warmup_steps
,
FLAGS
.
end_lr
,
FLAGS
.
optimizer_type
)
squad_model
.
optimizer
=
performance
.
configure_optimizer
(
optimizer
,
use_float16
=
common_flags
.
use_float16
(),
use_graph_rewrite
=
common_flags
.
use_graph_rewrite
())
return
squad_model
,
core_model
# Only when explicit_allreduce = True, post_allreduce_callbacks and
# allreduce_bytes_per_pack will take effect. optimizer.apply_gradients() no
# longer implicitly allreduce gradients, users manually allreduce gradient and
# pass the allreduced grads_and_vars to apply_gradients().
# With explicit_allreduce = True, clip_by_global_norm is moved to after
# allreduce.
model_training_utils
.
run_customized_training_loop
(
strategy
=
strategy
,
model_fn
=
_get_squad_model
,
loss_fn
=
get_loss_fn
(),
model_dir
=
FLAGS
.
model_dir
,
steps_per_epoch
=
steps_per_epoch
,
steps_per_loop
=
FLAGS
.
steps_per_loop
,
epochs
=
epochs
,
train_input_fn
=
train_input_fn
,
init_checkpoint
=
init_checkpoint
or
FLAGS
.
init_checkpoint
,
sub_model_export_name
=
sub_model_export_name
,
run_eagerly
=
run_eagerly
,
custom_callbacks
=
custom_callbacks
,
explicit_allreduce
=
FLAGS
.
explicit_allreduce
,
pre_allreduce_callbacks
=
[
model_training_utils
.
clip_by_global_norm_callback
],
allreduce_bytes_per_pack
=
FLAGS
.
allreduce_bytes_per_pack
)
def
prediction_output_squad
(
strategy
,
input_meta_data
,
tokenizer
,
squad_lib
,
predict_file
,
squad_model
):
"""Makes predictions for a squad dataset."""
doc_stride
=
input_meta_data
[
'doc_stride'
]
max_query_length
=
input_meta_data
[
'max_query_length'
]
# Whether data should be in Ver 2.0 format.
version_2_with_negative
=
input_meta_data
.
get
(
'version_2_with_negative'
,
False
)
eval_examples
=
squad_lib
.
read_squad_examples
(
input_file
=
predict_file
,
is_training
=
False
,
version_2_with_negative
=
version_2_with_negative
)
eval_writer
=
squad_lib
.
FeatureWriter
(
filename
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'eval.tf_record'
),
is_training
=
False
)
eval_features
=
[]
def
_append_feature
(
feature
,
is_padding
):
if
not
is_padding
:
eval_features
.
append
(
feature
)
eval_writer
.
process_feature
(
feature
)
# TPU requires a fixed batch size for all batches, therefore the number
# of examples must be a multiple of the batch size, or else examples
# will get dropped. So we pad with fake examples which are ignored
# later on.
kwargs
=
dict
(
examples
=
eval_examples
,
tokenizer
=
tokenizer
,
max_seq_length
=
input_meta_data
[
'max_seq_length'
],
doc_stride
=
doc_stride
,
max_query_length
=
max_query_length
,
is_training
=
False
,
output_fn
=
_append_feature
,
batch_size
=
FLAGS
.
predict_batch_size
)
# squad_lib_sp requires one more argument 'do_lower_case'.
if
squad_lib
==
squad_lib_sp
:
kwargs
[
'do_lower_case'
]
=
FLAGS
.
do_lower_case
dataset_size
=
squad_lib
.
convert_examples_to_features
(
**
kwargs
)
eval_writer
.
close
()
logging
.
info
(
'***** Running predictions *****'
)
logging
.
info
(
' Num orig examples = %d'
,
len
(
eval_examples
))
logging
.
info
(
' Num split examples = %d'
,
len
(
eval_features
))
logging
.
info
(
' Batch size = %d'
,
FLAGS
.
predict_batch_size
)
num_steps
=
int
(
dataset_size
/
FLAGS
.
predict_batch_size
)
all_results
=
predict_squad_customized
(
strategy
,
input_meta_data
,
eval_writer
.
filename
,
num_steps
,
squad_model
)
all_predictions
,
all_nbest_json
,
scores_diff_json
=
(
squad_lib
.
postprocess_output
(
eval_examples
,
eval_features
,
all_results
,
FLAGS
.
n_best_size
,
FLAGS
.
max_answer_length
,
FLAGS
.
do_lower_case
,
version_2_with_negative
=
version_2_with_negative
,
null_score_diff_threshold
=
FLAGS
.
null_score_diff_threshold
,
verbose
=
FLAGS
.
verbose_logging
))
return
all_predictions
,
all_nbest_json
,
scores_diff_json
def
dump_to_files
(
all_predictions
,
all_nbest_json
,
scores_diff_json
,
squad_lib
,
version_2_with_negative
,
file_prefix
=
''
):
"""Save output to json files."""
output_prediction_file
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'%spredictions.json'
%
file_prefix
)
output_nbest_file
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'%snbest_predictions.json'
%
file_prefix
)
output_null_log_odds_file
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
file_prefix
,
'%snull_odds.json'
%
file_prefix
)
logging
.
info
(
'Writing predictions to: %s'
,
(
output_prediction_file
))
logging
.
info
(
'Writing nbest to: %s'
,
(
output_nbest_file
))
squad_lib
.
write_to_json_files
(
all_predictions
,
output_prediction_file
)
squad_lib
.
write_to_json_files
(
all_nbest_json
,
output_nbest_file
)
if
version_2_with_negative
:
squad_lib
.
write_to_json_files
(
scores_diff_json
,
output_null_log_odds_file
)
def
_get_matched_files
(
input_path
):
"""Returns all files that matches the input_path."""
input_patterns
=
input_path
.
strip
().
split
(
','
)
all_matched_files
=
[]
for
input_pattern
in
input_patterns
:
input_pattern
=
input_pattern
.
strip
()
if
not
input_pattern
:
continue
matched_files
=
tf
.
io
.
gfile
.
glob
(
input_pattern
)
if
not
matched_files
:
raise
ValueError
(
'%s does not match any files.'
%
input_pattern
)
else
:
all_matched_files
.
extend
(
matched_files
)
return
sorted
(
all_matched_files
)
def
predict_squad
(
strategy
,
input_meta_data
,
tokenizer
,
bert_config
,
squad_lib
,
init_checkpoint
=
None
):
"""Get prediction results and evaluate them to hard drive."""
if
init_checkpoint
is
None
:
init_checkpoint
=
tf
.
train
.
latest_checkpoint
(
FLAGS
.
model_dir
)
all_predict_files
=
_get_matched_files
(
FLAGS
.
predict_file
)
squad_model
=
get_squad_model_to_predict
(
strategy
,
bert_config
,
init_checkpoint
,
input_meta_data
)
for
idx
,
predict_file
in
enumerate
(
all_predict_files
):
all_predictions
,
all_nbest_json
,
scores_diff_json
=
prediction_output_squad
(
strategy
,
input_meta_data
,
tokenizer
,
squad_lib
,
predict_file
,
squad_model
)
if
len
(
all_predict_files
)
==
1
:
file_prefix
=
''
else
:
# if predict_file is /path/xquad.ar.json, the `file_prefix` may be
# "xquad.ar-0-"
file_prefix
=
'%s-'
%
os
.
path
.
splitext
(
os
.
path
.
basename
(
all_predict_files
[
idx
]))[
0
]
dump_to_files
(
all_predictions
,
all_nbest_json
,
scores_diff_json
,
squad_lib
,
input_meta_data
.
get
(
'version_2_with_negative'
,
False
),
file_prefix
)
def
eval_squad
(
strategy
,
input_meta_data
,
tokenizer
,
bert_config
,
squad_lib
,
init_checkpoint
=
None
):
"""Get prediction results and evaluate them against ground truth."""
if
init_checkpoint
is
None
:
init_checkpoint
=
tf
.
train
.
latest_checkpoint
(
FLAGS
.
model_dir
)
all_predict_files
=
_get_matched_files
(
FLAGS
.
predict_file
)
if
len
(
all_predict_files
)
!=
1
:
raise
ValueError
(
'`eval_squad` only supports one predict file, '
'but got %s'
%
all_predict_files
)
squad_model
=
get_squad_model_to_predict
(
strategy
,
bert_config
,
init_checkpoint
,
input_meta_data
)
all_predictions
,
all_nbest_json
,
scores_diff_json
=
prediction_output_squad
(
strategy
,
input_meta_data
,
tokenizer
,
squad_lib
,
all_predict_files
[
0
],
squad_model
)
dump_to_files
(
all_predictions
,
all_nbest_json
,
scores_diff_json
,
squad_lib
,
input_meta_data
.
get
(
'version_2_with_negative'
,
False
))
with
tf
.
io
.
gfile
.
GFile
(
FLAGS
.
predict_file
,
'r'
)
as
reader
:
dataset_json
=
json
.
load
(
reader
)
pred_dataset
=
dataset_json
[
'data'
]
if
input_meta_data
.
get
(
'version_2_with_negative'
,
False
):
eval_metrics
=
squad_evaluate_v2_0
.
evaluate
(
pred_dataset
,
all_predictions
,
scores_diff_json
)
else
:
eval_metrics
=
squad_evaluate_v1_1
.
evaluate
(
pred_dataset
,
all_predictions
)
return
eval_metrics
def
export_squad
(
model_export_path
,
input_meta_data
,
bert_config
):
"""Exports a trained model as a `SavedModel` for inference.
Args:
model_export_path: a string specifying the path to the SavedModel directory.
input_meta_data: dictionary containing meta data about input and model.
bert_config: Bert configuration file to define core bert layers.
Raises:
Export path is not specified, got an empty string or None.
"""
if
not
model_export_path
:
raise
ValueError
(
'Export path is not specified: %s'
%
model_export_path
)
# Export uses float32 for now, even if training uses mixed precision.
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'float32'
)
squad_model
,
_
=
bert_models
.
squad_model
(
bert_config
,
input_meta_data
[
'max_seq_length'
])
model_saving_utils
.
export_bert_model
(
model_export_path
,
model
=
squad_model
,
checkpoint_dir
=
FLAGS
.
model_dir
)
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/serving.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Examples of SavedModel export for tf-serving."""
from
absl
import
app
from
absl
import
flags
import
tensorflow
as
tf
from
official.nlp.bert
import
bert_models
from
official.nlp.bert
import
configs
flags
.
DEFINE_integer
(
"sequence_length"
,
None
,
"Sequence length to parse the tf.Example. If "
"sequence_length > 0, add a signature for serialized "
"tf.Example and define the parsing specification by the "
"sequence_length."
)
flags
.
DEFINE_string
(
"bert_config_file"
,
None
,
"Bert configuration file to define core bert layers."
)
flags
.
DEFINE_string
(
"model_checkpoint_path"
,
None
,
"File path to TF model checkpoint."
)
flags
.
DEFINE_string
(
"export_path"
,
None
,
"Destination folder to export the serving SavedModel."
)
FLAGS
=
flags
.
FLAGS
class
BertServing
(
tf
.
keras
.
Model
):
"""Bert transformer encoder model for serving."""
def
__init__
(
self
,
bert_config
,
name_to_features
=
None
,
name
=
"serving_model"
):
super
(
BertServing
,
self
).
__init__
(
name
=
name
)
self
.
encoder
=
bert_models
.
get_transformer_encoder
(
bert_config
,
sequence_length
=
None
)
self
.
name_to_features
=
name_to_features
def
call
(
self
,
inputs
):
input_word_ids
=
inputs
[
"input_ids"
]
input_mask
=
inputs
[
"input_mask"
]
input_type_ids
=
inputs
[
"segment_ids"
]
encoder_outputs
,
_
=
self
.
encoder
(
[
input_word_ids
,
input_mask
,
input_type_ids
])
return
encoder_outputs
def
serve_body
(
self
,
input_ids
,
input_mask
=
None
,
segment_ids
=
None
):
if
segment_ids
is
None
:
# Requires CLS token is the first token of inputs.
segment_ids
=
tf
.
zeros_like
(
input_ids
)
if
input_mask
is
None
:
# The mask has 1 for real tokens and 0 for padding tokens.
input_mask
=
tf
.
where
(
tf
.
equal
(
input_ids
,
0
),
tf
.
zeros_like
(
input_ids
),
tf
.
ones_like
(
input_ids
))
inputs
=
dict
(
input_ids
=
input_ids
,
input_mask
=
input_mask
,
segment_ids
=
segment_ids
)
return
self
.
call
(
inputs
)
@
tf
.
function
def
serve
(
self
,
input_ids
,
input_mask
=
None
,
segment_ids
=
None
):
outputs
=
self
.
serve_body
(
input_ids
,
input_mask
,
segment_ids
)
# Returns a dictionary to control SignatureDef output signature.
return
{
"outputs"
:
outputs
[
-
1
]}
@
tf
.
function
def
serve_examples
(
self
,
inputs
):
features
=
tf
.
io
.
parse_example
(
inputs
,
self
.
name_to_features
)
for
key
in
list
(
features
.
keys
()):
t
=
features
[
key
]
if
t
.
dtype
==
tf
.
int64
:
t
=
tf
.
cast
(
t
,
tf
.
int32
)
features
[
key
]
=
t
return
self
.
serve
(
features
[
"input_ids"
],
input_mask
=
features
[
"input_mask"
]
if
"input_mask"
in
features
else
None
,
segment_ids
=
features
[
"segment_ids"
]
if
"segment_ids"
in
features
else
None
)
@
classmethod
def
export
(
cls
,
model
,
export_dir
):
if
not
isinstance
(
model
,
cls
):
raise
ValueError
(
"Invalid model instance: %s, it should be a %s"
%
(
model
,
cls
))
signatures
=
{
"serving_default"
:
model
.
serve
.
get_concrete_function
(
input_ids
=
tf
.
TensorSpec
(
shape
=
[
None
,
None
],
dtype
=
tf
.
int32
,
name
=
"inputs"
)),
}
if
model
.
name_to_features
:
signatures
[
"serving_examples"
]
=
model
.
serve_examples
.
get_concrete_function
(
tf
.
TensorSpec
(
shape
=
[
None
],
dtype
=
tf
.
string
,
name
=
"examples"
))
tf
.
saved_model
.
save
(
model
,
export_dir
=
export_dir
,
signatures
=
signatures
)
def
main
(
_
):
sequence_length
=
FLAGS
.
sequence_length
if
sequence_length
is
not
None
and
sequence_length
>
0
:
name_to_features
=
{
"input_ids"
:
tf
.
io
.
FixedLenFeature
([
sequence_length
],
tf
.
int64
),
"input_mask"
:
tf
.
io
.
FixedLenFeature
([
sequence_length
],
tf
.
int64
),
"segment_ids"
:
tf
.
io
.
FixedLenFeature
([
sequence_length
],
tf
.
int64
),
}
else
:
name_to_features
=
None
bert_config
=
configs
.
BertConfig
.
from_json_file
(
FLAGS
.
bert_config_file
)
serving_model
=
BertServing
(
bert_config
=
bert_config
,
name_to_features
=
name_to_features
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
serving_model
.
encoder
)
checkpoint
.
restore
(
FLAGS
.
model_checkpoint_path
).
assert_existing_objects_matched
().
run_restore_ops
()
BertServing
.
export
(
serving_model
,
FLAGS
.
export_path
)
if
__name__
==
"__main__"
:
flags
.
mark_flag_as_required
(
"bert_config_file"
)
flags
.
mark_flag_as_required
(
"model_checkpoint_path"
)
flags
.
mark_flag_as_required
(
"export_path"
)
app
.
run
(
main
)
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/squad_evaluate_v1_1.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation of SQuAD predictions (version 1.1).
The functions are copied from
https://worksheets.codalab.org/rest/bundles/0xbcd57bee090b421c982906709c8c27e1/contents/blob/.
The SQuAD dataset is described in this paper:
SQuAD: 100,000+ Questions for Machine Comprehension of Text
Pranav Rajpurkar, Jian Zhang, Konstantin Lopyrev, Percy Liang
https://nlp.stanford.edu/pubs/rajpurkar2016squad.pdf
"""
import
collections
import
re
import
string
# pylint: disable=g-bad-import-order
from
absl
import
logging
# pylint: enable=g-bad-import-order
def
_normalize_answer
(
s
):
"""Lowers text and remove punctuation, articles and extra whitespace."""
def
remove_articles
(
text
):
return
re
.
sub
(
r
"\b(a|an|the)\b"
,
" "
,
text
)
def
white_space_fix
(
text
):
return
" "
.
join
(
text
.
split
())
def
remove_punc
(
text
):
exclude
=
set
(
string
.
punctuation
)
return
""
.
join
(
ch
for
ch
in
text
if
ch
not
in
exclude
)
def
lower
(
text
):
return
text
.
lower
()
return
white_space_fix
(
remove_articles
(
remove_punc
(
lower
(
s
))))
def
_f1_score
(
prediction
,
ground_truth
):
"""Computes F1 score by comparing prediction to ground truth."""
prediction_tokens
=
_normalize_answer
(
prediction
).
split
()
ground_truth_tokens
=
_normalize_answer
(
ground_truth
).
split
()
prediction_counter
=
collections
.
Counter
(
prediction_tokens
)
ground_truth_counter
=
collections
.
Counter
(
ground_truth_tokens
)
common
=
prediction_counter
&
ground_truth_counter
num_same
=
sum
(
common
.
values
())
if
num_same
==
0
:
return
0
precision
=
1.0
*
num_same
/
len
(
prediction_tokens
)
recall
=
1.0
*
num_same
/
len
(
ground_truth_tokens
)
f1
=
(
2
*
precision
*
recall
)
/
(
precision
+
recall
)
return
f1
def
_exact_match_score
(
prediction
,
ground_truth
):
"""Checks if predicted answer exactly matches ground truth answer."""
return
_normalize_answer
(
prediction
)
==
_normalize_answer
(
ground_truth
)
def
_metric_max_over_ground_truths
(
metric_fn
,
prediction
,
ground_truths
):
"""Computes the max over all metric scores."""
scores_for_ground_truths
=
[]
for
ground_truth
in
ground_truths
:
score
=
metric_fn
(
prediction
,
ground_truth
)
scores_for_ground_truths
.
append
(
score
)
return
max
(
scores_for_ground_truths
)
def
evaluate
(
dataset
,
predictions
):
"""Evaluates predictions for a dataset."""
f1
=
exact_match
=
total
=
0
for
article
in
dataset
:
for
paragraph
in
article
[
"paragraphs"
]:
for
qa
in
paragraph
[
"qas"
]:
total
+=
1
if
qa
[
"id"
]
not
in
predictions
:
message
=
"Unanswered question "
+
qa
[
"id"
]
+
" will receive score 0."
logging
.
error
(
message
)
continue
ground_truths
=
[
entry
[
"text"
]
for
entry
in
qa
[
"answers"
]]
prediction
=
predictions
[
qa
[
"id"
]]
exact_match
+=
_metric_max_over_ground_truths
(
_exact_match_score
,
prediction
,
ground_truths
)
f1
+=
_metric_max_over_ground_truths
(
_f1_score
,
prediction
,
ground_truths
)
exact_match
=
exact_match
/
total
f1
=
f1
/
total
return
{
"exact_match"
:
exact_match
,
"final_f1"
:
f1
}
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/squad_evaluate_v2_0.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation script for SQuAD version 2.0.
The functions are copied and modified from
https://raw.githubusercontent.com/white127/SQUAD-2.0-bidaf/master/evaluate-v2.0.py
In addition to basic functionality, we also compute additional statistics and
plot precision-recall curves if an additional na_prob.json file is provided.
This file is expected to map question ID's to the model's predicted probability
that a question is unanswerable.
"""
import
collections
import
re
import
string
from
absl
import
logging
def
_make_qid_to_has_ans
(
dataset
):
qid_to_has_ans
=
{}
for
article
in
dataset
:
for
p
in
article
[
'paragraphs'
]:
for
qa
in
p
[
'qas'
]:
qid_to_has_ans
[
qa
[
'id'
]]
=
bool
(
qa
[
'answers'
])
return
qid_to_has_ans
def
_normalize_answer
(
s
):
"""Lower text and remove punctuation, articles and extra whitespace."""
def
remove_articles
(
text
):
regex
=
re
.
compile
(
r
'\b(a|an|the)\b'
,
re
.
UNICODE
)
return
re
.
sub
(
regex
,
' '
,
text
)
def
white_space_fix
(
text
):
return
' '
.
join
(
text
.
split
())
def
remove_punc
(
text
):
exclude
=
set
(
string
.
punctuation
)
return
''
.
join
(
ch
for
ch
in
text
if
ch
not
in
exclude
)
def
lower
(
text
):
return
text
.
lower
()
return
white_space_fix
(
remove_articles
(
remove_punc
(
lower
(
s
))))
def
_get_tokens
(
s
):
if
not
s
:
return
[]
return
_normalize_answer
(
s
).
split
()
def
_compute_exact
(
a_gold
,
a_pred
):
return
int
(
_normalize_answer
(
a_gold
)
==
_normalize_answer
(
a_pred
))
def
_compute_f1
(
a_gold
,
a_pred
):
"""Compute F1-score."""
gold_toks
=
_get_tokens
(
a_gold
)
pred_toks
=
_get_tokens
(
a_pred
)
common
=
collections
.
Counter
(
gold_toks
)
&
collections
.
Counter
(
pred_toks
)
num_same
=
sum
(
common
.
values
())
if
not
gold_toks
or
not
pred_toks
:
# If either is no-answer, then F1 is 1 if they agree, 0 otherwise
return
int
(
gold_toks
==
pred_toks
)
if
num_same
==
0
:
return
0
precision
=
1.0
*
num_same
/
len
(
pred_toks
)
recall
=
1.0
*
num_same
/
len
(
gold_toks
)
f1
=
(
2
*
precision
*
recall
)
/
(
precision
+
recall
)
return
f1
def
_get_raw_scores
(
dataset
,
predictions
):
"""Compute raw scores."""
exact_scores
=
{}
f1_scores
=
{}
for
article
in
dataset
:
for
p
in
article
[
'paragraphs'
]:
for
qa
in
p
[
'qas'
]:
qid
=
qa
[
'id'
]
gold_answers
=
[
a
[
'text'
]
for
a
in
qa
[
'answers'
]
if
_normalize_answer
(
a
[
'text'
])]
if
not
gold_answers
:
# For unanswerable questions, only correct answer is empty string
gold_answers
=
[
''
]
if
qid
not
in
predictions
:
logging
.
error
(
'Missing prediction for %s'
,
qid
)
continue
a_pred
=
predictions
[
qid
]
# Take max over all gold answers
exact_scores
[
qid
]
=
max
(
_compute_exact
(
a
,
a_pred
)
for
a
in
gold_answers
)
f1_scores
[
qid
]
=
max
(
_compute_f1
(
a
,
a_pred
)
for
a
in
gold_answers
)
return
exact_scores
,
f1_scores
def
_apply_no_ans_threshold
(
scores
,
na_probs
,
qid_to_has_ans
,
na_prob_thresh
=
1.0
):
new_scores
=
{}
for
qid
,
s
in
scores
.
items
():
pred_na
=
na_probs
[
qid
]
>
na_prob_thresh
if
pred_na
:
new_scores
[
qid
]
=
float
(
not
qid_to_has_ans
[
qid
])
else
:
new_scores
[
qid
]
=
s
return
new_scores
def
_make_eval_dict
(
exact_scores
,
f1_scores
,
qid_list
=
None
):
"""Make evaluation result dictionary."""
if
not
qid_list
:
total
=
len
(
exact_scores
)
return
collections
.
OrderedDict
([
(
'exact'
,
100.0
*
sum
(
exact_scores
.
values
())
/
total
),
(
'f1'
,
100.0
*
sum
(
f1_scores
.
values
())
/
total
),
(
'total'
,
total
),
])
else
:
total
=
len
(
qid_list
)
return
collections
.
OrderedDict
([
(
'exact'
,
100.0
*
sum
(
exact_scores
[
k
]
for
k
in
qid_list
)
/
total
),
(
'f1'
,
100.0
*
sum
(
f1_scores
[
k
]
for
k
in
qid_list
)
/
total
),
(
'total'
,
total
),
])
def
_merge_eval
(
main_eval
,
new_eval
,
prefix
):
for
k
in
new_eval
:
main_eval
[
'%s_%s'
%
(
prefix
,
k
)]
=
new_eval
[
k
]
def
_make_precision_recall_eval
(
scores
,
na_probs
,
num_true_pos
,
qid_to_has_ans
):
"""Make evaluation dictionary containing average recision recall."""
qid_list
=
sorted
(
na_probs
,
key
=
lambda
k
:
na_probs
[
k
])
true_pos
=
0.0
cur_p
=
1.0
cur_r
=
0.0
precisions
=
[
1.0
]
recalls
=
[
0.0
]
avg_prec
=
0.0
for
i
,
qid
in
enumerate
(
qid_list
):
if
qid_to_has_ans
[
qid
]:
true_pos
+=
scores
[
qid
]
cur_p
=
true_pos
/
float
(
i
+
1
)
cur_r
=
true_pos
/
float
(
num_true_pos
)
if
i
==
len
(
qid_list
)
-
1
or
na_probs
[
qid
]
!=
na_probs
[
qid_list
[
i
+
1
]]:
# i.e., if we can put a threshold after this point
avg_prec
+=
cur_p
*
(
cur_r
-
recalls
[
-
1
])
precisions
.
append
(
cur_p
)
recalls
.
append
(
cur_r
)
return
{
'ap'
:
100.0
*
avg_prec
}
def
_run_precision_recall_analysis
(
main_eval
,
exact_raw
,
f1_raw
,
na_probs
,
qid_to_has_ans
):
"""Run precision recall analysis and return result dictionary."""
num_true_pos
=
sum
(
1
for
v
in
qid_to_has_ans
.
values
()
if
v
)
if
num_true_pos
==
0
:
return
pr_exact
=
_make_precision_recall_eval
(
exact_raw
,
na_probs
,
num_true_pos
,
qid_to_has_ans
)
pr_f1
=
_make_precision_recall_eval
(
f1_raw
,
na_probs
,
num_true_pos
,
qid_to_has_ans
)
oracle_scores
=
{
k
:
float
(
v
)
for
k
,
v
in
qid_to_has_ans
.
items
()}
pr_oracle
=
_make_precision_recall_eval
(
oracle_scores
,
na_probs
,
num_true_pos
,
qid_to_has_ans
)
_merge_eval
(
main_eval
,
pr_exact
,
'pr_exact'
)
_merge_eval
(
main_eval
,
pr_f1
,
'pr_f1'
)
_merge_eval
(
main_eval
,
pr_oracle
,
'pr_oracle'
)
def
_find_best_thresh
(
predictions
,
scores
,
na_probs
,
qid_to_has_ans
):
"""Find the best threshold for no answer probability."""
num_no_ans
=
sum
(
1
for
k
in
qid_to_has_ans
if
not
qid_to_has_ans
[
k
])
cur_score
=
num_no_ans
best_score
=
cur_score
best_thresh
=
0.0
qid_list
=
sorted
(
na_probs
,
key
=
lambda
k
:
na_probs
[
k
])
for
qid
in
qid_list
:
if
qid
not
in
scores
:
continue
if
qid_to_has_ans
[
qid
]:
diff
=
scores
[
qid
]
else
:
if
predictions
[
qid
]:
diff
=
-
1
else
:
diff
=
0
cur_score
+=
diff
if
cur_score
>
best_score
:
best_score
=
cur_score
best_thresh
=
na_probs
[
qid
]
return
100.0
*
best_score
/
len
(
scores
),
best_thresh
def
_find_all_best_thresh
(
main_eval
,
predictions
,
exact_raw
,
f1_raw
,
na_probs
,
qid_to_has_ans
):
best_exact
,
exact_thresh
=
_find_best_thresh
(
predictions
,
exact_raw
,
na_probs
,
qid_to_has_ans
)
best_f1
,
f1_thresh
=
_find_best_thresh
(
predictions
,
f1_raw
,
na_probs
,
qid_to_has_ans
)
main_eval
[
'final_exact'
]
=
best_exact
main_eval
[
'final_exact_thresh'
]
=
exact_thresh
main_eval
[
'final_f1'
]
=
best_f1
main_eval
[
'final_f1_thresh'
]
=
f1_thresh
def
evaluate
(
dataset
,
predictions
,
na_probs
=
None
):
"""Evaluate prediction results."""
new_orig_data
=
[]
for
article
in
dataset
:
for
p
in
article
[
'paragraphs'
]:
for
qa
in
p
[
'qas'
]:
if
qa
[
'id'
]
in
predictions
:
new_para
=
{
'qas'
:
[
qa
]}
new_article
=
{
'paragraphs'
:
[
new_para
]}
new_orig_data
.
append
(
new_article
)
dataset
=
new_orig_data
if
na_probs
is
None
:
na_probs
=
{
k
:
0.0
for
k
in
predictions
}
qid_to_has_ans
=
_make_qid_to_has_ans
(
dataset
)
# maps qid to True/False
has_ans_qids
=
[
k
for
k
,
v
in
qid_to_has_ans
.
items
()
if
v
]
no_ans_qids
=
[
k
for
k
,
v
in
qid_to_has_ans
.
items
()
if
not
v
]
exact_raw
,
f1_raw
=
_get_raw_scores
(
dataset
,
predictions
)
exact_thresh
=
_apply_no_ans_threshold
(
exact_raw
,
na_probs
,
qid_to_has_ans
)
f1_thresh
=
_apply_no_ans_threshold
(
f1_raw
,
na_probs
,
qid_to_has_ans
)
out_eval
=
_make_eval_dict
(
exact_thresh
,
f1_thresh
)
if
has_ans_qids
:
has_ans_eval
=
_make_eval_dict
(
exact_thresh
,
f1_thresh
,
qid_list
=
has_ans_qids
)
_merge_eval
(
out_eval
,
has_ans_eval
,
'HasAns'
)
if
no_ans_qids
:
no_ans_eval
=
_make_eval_dict
(
exact_thresh
,
f1_thresh
,
qid_list
=
no_ans_qids
)
_merge_eval
(
out_eval
,
no_ans_eval
,
'NoAns'
)
_find_all_best_thresh
(
out_eval
,
predictions
,
exact_raw
,
f1_raw
,
na_probs
,
qid_to_has_ans
)
_run_precision_recall_analysis
(
out_eval
,
exact_raw
,
f1_raw
,
na_probs
,
qid_to_has_ans
)
return
out_eval
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/tf1_checkpoint_converter_lib.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
r
"""Convert checkpoints created by Estimator (tf1) to be Keras compatible."""
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
# TF 1.x
# Mapping between old <=> new names. The source pattern in original variable
# name will be replaced by destination pattern.
BERT_NAME_REPLACEMENTS
=
(
(
"bert"
,
"bert_model"
),
(
"embeddings/word_embeddings"
,
"word_embeddings/embeddings"
),
(
"embeddings/token_type_embeddings"
,
"embedding_postprocessor/type_embeddings"
),
(
"embeddings/position_embeddings"
,
"embedding_postprocessor/position_embeddings"
),
(
"embeddings/LayerNorm"
,
"embedding_postprocessor/layer_norm"
),
(
"attention/self"
,
"self_attention"
),
(
"attention/output/dense"
,
"self_attention_output"
),
(
"attention/output/LayerNorm"
,
"self_attention_layer_norm"
),
(
"intermediate/dense"
,
"intermediate"
),
(
"output/dense"
,
"output"
),
(
"output/LayerNorm"
,
"output_layer_norm"
),
(
"pooler/dense"
,
"pooler_transform"
),
)
BERT_V2_NAME_REPLACEMENTS
=
(
(
"bert/"
,
""
),
(
"encoder"
,
"transformer"
),
(
"embeddings/word_embeddings"
,
"word_embeddings/embeddings"
),
(
"embeddings/token_type_embeddings"
,
"type_embeddings/embeddings"
),
(
"embeddings/position_embeddings"
,
"position_embedding/embeddings"
),
(
"embeddings/LayerNorm"
,
"embeddings/layer_norm"
),
(
"attention/self"
,
"self_attention"
),
(
"attention/output/dense"
,
"self_attention/attention_output"
),
(
"attention/output/LayerNorm"
,
"self_attention_layer_norm"
),
(
"intermediate/dense"
,
"intermediate"
),
(
"output/dense"
,
"output"
),
(
"output/LayerNorm"
,
"output_layer_norm"
),
(
"pooler/dense"
,
"pooler_transform"
),
(
"cls/predictions"
,
"bert/cls/predictions"
),
(
"cls/predictions/output_bias"
,
"cls/predictions/output_bias/bias"
),
(
"cls/seq_relationship/output_bias"
,
"predictions/transform/logits/bias"
),
(
"cls/seq_relationship/output_weights"
,
"predictions/transform/logits/kernel"
),
)
BERT_PERMUTATIONS
=
()
BERT_V2_PERMUTATIONS
=
((
"cls/seq_relationship/output_weights"
,
(
1
,
0
)),)
def
_bert_name_replacement
(
var_name
,
name_replacements
):
"""Gets the variable name replacement."""
for
src_pattern
,
tgt_pattern
in
name_replacements
:
if
src_pattern
in
var_name
:
old_var_name
=
var_name
var_name
=
var_name
.
replace
(
src_pattern
,
tgt_pattern
)
tf
.
logging
.
info
(
"Converted: %s --> %s"
,
old_var_name
,
var_name
)
return
var_name
def
_has_exclude_patterns
(
name
,
exclude_patterns
):
"""Checks if a string contains substrings that match patterns to exclude."""
for
p
in
exclude_patterns
:
if
p
in
name
:
return
True
return
False
def
_get_permutation
(
name
,
permutations
):
"""Checks whether a variable requires transposition by pattern matching."""
for
src_pattern
,
permutation
in
permutations
:
if
src_pattern
in
name
:
tf
.
logging
.
info
(
"Permuted: %s --> %s"
,
name
,
permutation
)
return
permutation
return
None
def
_get_new_shape
(
name
,
shape
,
num_heads
):
"""Checks whether a variable requires reshape by pattern matching."""
if
"self_attention/attention_output/kernel"
in
name
:
return
tuple
([
num_heads
,
shape
[
0
]
//
num_heads
,
shape
[
1
]])
if
"self_attention/attention_output/bias"
in
name
:
return
shape
patterns
=
[
"self_attention/query"
,
"self_attention/value"
,
"self_attention/key"
]
for
pattern
in
patterns
:
if
pattern
in
name
:
if
"kernel"
in
name
:
return
tuple
([
shape
[
0
],
num_heads
,
shape
[
1
]
//
num_heads
])
if
"bias"
in
name
:
return
tuple
([
num_heads
,
shape
[
0
]
//
num_heads
])
return
None
def
create_v2_checkpoint
(
model
,
src_checkpoint
,
output_path
,
checkpoint_model_name
=
"model"
):
"""Converts a name-based matched TF V1 checkpoint to TF V2 checkpoint."""
# Uses streaming-restore in eager model to read V1 name-based checkpoints.
model
.
load_weights
(
src_checkpoint
).
assert_existing_objects_matched
()
if
hasattr
(
model
,
"checkpoint_items"
):
checkpoint_items
=
model
.
checkpoint_items
else
:
checkpoint_items
=
{}
checkpoint_items
[
checkpoint_model_name
]
=
model
checkpoint
=
tf
.
train
.
Checkpoint
(
**
checkpoint_items
)
checkpoint
.
save
(
output_path
)
def
convert
(
checkpoint_from_path
,
checkpoint_to_path
,
num_heads
,
name_replacements
,
permutations
,
exclude_patterns
=
None
):
"""Migrates the names of variables within a checkpoint.
Args:
checkpoint_from_path: Path to source checkpoint to be read in.
checkpoint_to_path: Path to checkpoint to be written out.
num_heads: The number of heads of the model.
name_replacements: A list of tuples of the form (match_str, replace_str)
describing variable names to adjust.
permutations: A list of tuples of the form (match_str, permutation)
describing permutations to apply to given variables. Note that match_str
should match the original variable name, not the replaced one.
exclude_patterns: A list of string patterns to exclude variables from
checkpoint conversion.
Returns:
A dictionary that maps the new variable names to the Variable objects.
A dictionary that maps the old variable names to the new variable names.
"""
with
tf
.
Graph
().
as_default
():
tf
.
logging
.
info
(
"Reading checkpoint_from_path %s"
,
checkpoint_from_path
)
reader
=
tf
.
train
.
NewCheckpointReader
(
checkpoint_from_path
)
name_shape_map
=
reader
.
get_variable_to_shape_map
()
new_variable_map
=
{}
conversion_map
=
{}
for
var_name
in
name_shape_map
:
if
exclude_patterns
and
_has_exclude_patterns
(
var_name
,
exclude_patterns
):
continue
# Get the original tensor data.
tensor
=
reader
.
get_tensor
(
var_name
)
# Look up the new variable name, if any.
new_var_name
=
_bert_name_replacement
(
var_name
,
name_replacements
)
# See if we need to reshape the underlying tensor.
new_shape
=
None
if
num_heads
>
0
:
new_shape
=
_get_new_shape
(
new_var_name
,
tensor
.
shape
,
num_heads
)
if
new_shape
:
tf
.
logging
.
info
(
"Veriable %s has a shape change from %s to %s"
,
var_name
,
tensor
.
shape
,
new_shape
)
tensor
=
np
.
reshape
(
tensor
,
new_shape
)
# See if we need to permute the underlying tensor.
permutation
=
_get_permutation
(
var_name
,
permutations
)
if
permutation
:
tensor
=
np
.
transpose
(
tensor
,
permutation
)
# Create a new variable with the possibly-reshaped or transposed tensor.
var
=
tf
.
Variable
(
tensor
,
name
=
var_name
)
# Save the variable into the new variable map.
new_variable_map
[
new_var_name
]
=
var
# Keep a list of converter variables for sanity checking.
if
new_var_name
!=
var_name
:
conversion_map
[
var_name
]
=
new_var_name
saver
=
tf
.
train
.
Saver
(
new_variable_map
)
with
tf
.
Session
()
as
sess
:
sess
.
run
(
tf
.
global_variables_initializer
())
tf
.
logging
.
info
(
"Writing checkpoint_to_path %s"
,
checkpoint_to_path
)
saver
.
save
(
sess
,
checkpoint_to_path
,
write_meta_graph
=
False
)
tf
.
logging
.
info
(
"Summary:"
)
tf
.
logging
.
info
(
" Converted %d variable name(s)."
,
len
(
new_variable_map
))
tf
.
logging
.
info
(
" Converted: %s"
,
str
(
conversion_map
))
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/tf2_encoder_checkpoint_converter.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A converter from a V1 BERT encoder checkpoint to a V2 encoder checkpoint.
The conversion will yield an object-oriented checkpoint that can be used
to restore a BertEncoder or BertPretrainerV2 object (see the `converted_model`
FLAG below).
"""
import
os
from
absl
import
app
from
absl
import
flags
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.nlp.bert
import
configs
from
official.nlp.bert
import
tf1_checkpoint_converter_lib
from
official.nlp.modeling
import
models
from
official.nlp.modeling
import
networks
FLAGS
=
flags
.
FLAGS
flags
.
DEFINE_string
(
"bert_config_file"
,
None
,
"Bert configuration file to define core bert layers."
)
flags
.
DEFINE_string
(
"checkpoint_to_convert"
,
None
,
"Initial checkpoint from a pretrained BERT model core (that is, only the "
"BertModel, with no task heads.)"
)
flags
.
DEFINE_string
(
"converted_checkpoint_path"
,
None
,
"Name for the created object-based V2 checkpoint."
)
flags
.
DEFINE_string
(
"checkpoint_model_name"
,
"encoder"
,
"The name of the model when saving the checkpoint, i.e., "
"the checkpoint will be saved using: "
"tf.train.Checkpoint(FLAGS.checkpoint_model_name=model)."
)
flags
.
DEFINE_enum
(
"converted_model"
,
"encoder"
,
[
"encoder"
,
"pretrainer"
],
"Whether to convert the checkpoint to a `BertEncoder` model or a "
"`BertPretrainerV2` model (with mlm but without classification heads)."
)
def
_create_bert_model
(
cfg
):
"""Creates a BERT keras core model from BERT configuration.
Args:
cfg: A `BertConfig` to create the core model.
Returns:
A BertEncoder network.
"""
bert_encoder
=
networks
.
BertEncoder
(
vocab_size
=
cfg
.
vocab_size
,
hidden_size
=
cfg
.
hidden_size
,
num_layers
=
cfg
.
num_hidden_layers
,
num_attention_heads
=
cfg
.
num_attention_heads
,
intermediate_size
=
cfg
.
intermediate_size
,
activation
=
tf_utils
.
get_activation
(
cfg
.
hidden_act
),
dropout_rate
=
cfg
.
hidden_dropout_prob
,
attention_dropout_rate
=
cfg
.
attention_probs_dropout_prob
,
max_sequence_length
=
cfg
.
max_position_embeddings
,
type_vocab_size
=
cfg
.
type_vocab_size
,
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
cfg
.
initializer_range
),
embedding_width
=
cfg
.
embedding_size
)
return
bert_encoder
def
_create_bert_pretrainer_model
(
cfg
):
"""Creates a BERT keras core model from BERT configuration.
Args:
cfg: A `BertConfig` to create the core model.
Returns:
A BertPretrainerV2 model.
"""
bert_encoder
=
_create_bert_model
(
cfg
)
pretrainer
=
models
.
BertPretrainerV2
(
encoder_network
=
bert_encoder
,
mlm_activation
=
tf_utils
.
get_activation
(
cfg
.
hidden_act
),
mlm_initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
cfg
.
initializer_range
))
# Makes sure the pretrainer variables are created.
_
=
pretrainer
(
pretrainer
.
inputs
)
return
pretrainer
def
convert_checkpoint
(
bert_config
,
output_path
,
v1_checkpoint
,
checkpoint_model_name
=
"model"
,
converted_model
=
"encoder"
):
"""Converts a V1 checkpoint into an OO V2 checkpoint."""
output_dir
,
_
=
os
.
path
.
split
(
output_path
)
tf
.
io
.
gfile
.
makedirs
(
output_dir
)
# Create a temporary V1 name-converted checkpoint in the output directory.
temporary_checkpoint_dir
=
os
.
path
.
join
(
output_dir
,
"temp_v1"
)
temporary_checkpoint
=
os
.
path
.
join
(
temporary_checkpoint_dir
,
"ckpt"
)
tf1_checkpoint_converter_lib
.
convert
(
checkpoint_from_path
=
v1_checkpoint
,
checkpoint_to_path
=
temporary_checkpoint
,
num_heads
=
bert_config
.
num_attention_heads
,
name_replacements
=
tf1_checkpoint_converter_lib
.
BERT_V2_NAME_REPLACEMENTS
,
permutations
=
tf1_checkpoint_converter_lib
.
BERT_V2_PERMUTATIONS
,
exclude_patterns
=
[
"adam"
,
"Adam"
])
if
converted_model
==
"encoder"
:
model
=
_create_bert_model
(
bert_config
)
elif
converted_model
==
"pretrainer"
:
model
=
_create_bert_pretrainer_model
(
bert_config
)
else
:
raise
ValueError
(
"Unsupported converted_model: %s"
%
converted_model
)
# Create a V2 checkpoint from the temporary checkpoint.
tf1_checkpoint_converter_lib
.
create_v2_checkpoint
(
model
,
temporary_checkpoint
,
output_path
,
checkpoint_model_name
)
# Clean up the temporary checkpoint, if it exists.
try
:
tf
.
io
.
gfile
.
rmtree
(
temporary_checkpoint_dir
)
except
tf
.
errors
.
OpError
:
# If it doesn't exist, we don't need to clean it up; continue.
pass
def
main
(
argv
):
if
len
(
argv
)
>
1
:
raise
app
.
UsageError
(
"Too many command-line arguments."
)
output_path
=
FLAGS
.
converted_checkpoint_path
v1_checkpoint
=
FLAGS
.
checkpoint_to_convert
checkpoint_model_name
=
FLAGS
.
checkpoint_model_name
converted_model
=
FLAGS
.
converted_model
bert_config
=
configs
.
BertConfig
.
from_json_file
(
FLAGS
.
bert_config_file
)
convert_checkpoint
(
bert_config
=
bert_config
,
output_path
=
output_path
,
v1_checkpoint
=
v1_checkpoint
,
checkpoint_model_name
=
checkpoint_model_name
,
converted_model
=
converted_model
)
if
__name__
==
"__main__"
:
app
.
run
(
main
)
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/tokenization.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# coding=utf-8
"""Tokenization classes implementation.
The file is forked from:
https://github.com/google-research/bert/blob/master/tokenization.py.
"""
import
collections
import
re
import
unicodedata
import
six
import
tensorflow
as
tf
import
sentencepiece
as
spm
SPIECE_UNDERLINE
=
"▁"
def
validate_case_matches_checkpoint
(
do_lower_case
,
init_checkpoint
):
"""Checks whether the casing config is consistent with the checkpoint name."""
# The casing has to be passed in by the user and there is no explicit check
# as to whether it matches the checkpoint. The casing information probably
# should have been stored in the bert_config.json file, but it's not, so
# we have to heuristically detect it to validate.
if
not
init_checkpoint
:
return
m
=
re
.
match
(
"^.*?([A-Za-z0-9_-]+)/bert_model.ckpt"
,
init_checkpoint
)
if
m
is
None
:
return
model_name
=
m
.
group
(
1
)
lower_models
=
[
"uncased_L-24_H-1024_A-16"
,
"uncased_L-12_H-768_A-12"
,
"multilingual_L-12_H-768_A-12"
,
"chinese_L-12_H-768_A-12"
]
cased_models
=
[
"cased_L-12_H-768_A-12"
,
"cased_L-24_H-1024_A-16"
,
"multi_cased_L-12_H-768_A-12"
]
is_bad_config
=
False
if
model_name
in
lower_models
and
not
do_lower_case
:
is_bad_config
=
True
actual_flag
=
"False"
case_name
=
"lowercased"
opposite_flag
=
"True"
if
model_name
in
cased_models
and
do_lower_case
:
is_bad_config
=
True
actual_flag
=
"True"
case_name
=
"cased"
opposite_flag
=
"False"
if
is_bad_config
:
raise
ValueError
(
"You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. "
"However, `%s` seems to be a %s model, so you "
"should pass in `--do_lower_case=%s` so that the fine-tuning matches "
"how the model was pre-training. If this error is wrong, please "
"just comment out this check."
%
(
actual_flag
,
init_checkpoint
,
model_name
,
case_name
,
opposite_flag
))
def
convert_to_unicode
(
text
):
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
if
six
.
PY3
:
if
isinstance
(
text
,
str
):
return
text
elif
isinstance
(
text
,
bytes
):
return
text
.
decode
(
"utf-8"
,
"ignore"
)
else
:
raise
ValueError
(
"Unsupported string type: %s"
%
(
type
(
text
)))
elif
six
.
PY2
:
if
isinstance
(
text
,
str
):
return
text
.
decode
(
"utf-8"
,
"ignore"
)
elif
isinstance
(
text
,
unicode
):
return
text
else
:
raise
ValueError
(
"Unsupported string type: %s"
%
(
type
(
text
)))
else
:
raise
ValueError
(
"Not running on Python2 or Python 3?"
)
def
printable_text
(
text
):
"""Returns text encoded in a way suitable for print or `tf.logging`."""
# These functions want `str` for both Python2 and Python3, but in one case
# it's a Unicode string and in the other it's a byte string.
if
six
.
PY3
:
if
isinstance
(
text
,
str
):
return
text
elif
isinstance
(
text
,
bytes
):
return
text
.
decode
(
"utf-8"
,
"ignore"
)
else
:
raise
ValueError
(
"Unsupported string type: %s"
%
(
type
(
text
)))
elif
six
.
PY2
:
if
isinstance
(
text
,
str
):
return
text
elif
isinstance
(
text
,
unicode
):
return
text
.
encode
(
"utf-8"
)
else
:
raise
ValueError
(
"Unsupported string type: %s"
%
(
type
(
text
)))
else
:
raise
ValueError
(
"Not running on Python2 or Python 3?"
)
def
load_vocab
(
vocab_file
):
"""Loads a vocabulary file into a dictionary."""
vocab
=
collections
.
OrderedDict
()
index
=
0
with
tf
.
io
.
gfile
.
GFile
(
vocab_file
,
"r"
)
as
reader
:
while
True
:
token
=
convert_to_unicode
(
reader
.
readline
())
if
not
token
:
break
token
=
token
.
strip
()
vocab
[
token
]
=
index
index
+=
1
return
vocab
def
convert_by_vocab
(
vocab
,
items
):
"""Converts a sequence of [tokens|ids] using the vocab."""
output
=
[]
for
item
in
items
:
output
.
append
(
vocab
[
item
])
return
output
def
convert_tokens_to_ids
(
vocab
,
tokens
):
return
convert_by_vocab
(
vocab
,
tokens
)
def
convert_ids_to_tokens
(
inv_vocab
,
ids
):
return
convert_by_vocab
(
inv_vocab
,
ids
)
def
whitespace_tokenize
(
text
):
"""Runs basic whitespace cleaning and splitting on a piece of text."""
text
=
text
.
strip
()
if
not
text
:
return
[]
tokens
=
text
.
split
()
return
tokens
class
FullTokenizer
(
object
):
"""Runs end-to-end tokenziation."""
def
__init__
(
self
,
vocab_file
,
do_lower_case
=
True
,
split_on_punc
=
True
):
self
.
vocab
=
load_vocab
(
vocab_file
)
self
.
inv_vocab
=
{
v
:
k
for
k
,
v
in
self
.
vocab
.
items
()}
self
.
basic_tokenizer
=
BasicTokenizer
(
do_lower_case
=
do_lower_case
,
split_on_punc
=
split_on_punc
)
self
.
wordpiece_tokenizer
=
WordpieceTokenizer
(
vocab
=
self
.
vocab
)
def
tokenize
(
self
,
text
):
split_tokens
=
[]
for
token
in
self
.
basic_tokenizer
.
tokenize
(
text
):
for
sub_token
in
self
.
wordpiece_tokenizer
.
tokenize
(
token
):
split_tokens
.
append
(
sub_token
)
return
split_tokens
def
convert_tokens_to_ids
(
self
,
tokens
):
return
convert_by_vocab
(
self
.
vocab
,
tokens
)
def
convert_ids_to_tokens
(
self
,
ids
):
return
convert_by_vocab
(
self
.
inv_vocab
,
ids
)
class
BasicTokenizer
(
object
):
"""Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
def
__init__
(
self
,
do_lower_case
=
True
,
split_on_punc
=
True
):
"""Constructs a BasicTokenizer.
Args:
do_lower_case: Whether to lower case the input.
split_on_punc: Whether to apply split on punctuations. By default BERT
starts a new token for punctuations. This makes detokenization difficult
for tasks like seq2seq decoding.
"""
self
.
do_lower_case
=
do_lower_case
self
.
split_on_punc
=
split_on_punc
def
tokenize
(
self
,
text
):
"""Tokenizes a piece of text."""
text
=
convert_to_unicode
(
text
)
text
=
self
.
_clean_text
(
text
)
# This was added on November 1st, 2018 for the multilingual and Chinese
# models. This is also applied to the English models now, but it doesn't
# matter since the English models were not trained on any Chinese data
# and generally don't have any Chinese data in them (there are Chinese
# characters in the vocabulary because Wikipedia does have some Chinese
# words in the English Wikipedia.).
text
=
self
.
_tokenize_chinese_chars
(
text
)
orig_tokens
=
whitespace_tokenize
(
text
)
split_tokens
=
[]
for
token
in
orig_tokens
:
if
self
.
do_lower_case
:
token
=
token
.
lower
()
token
=
self
.
_run_strip_accents
(
token
)
if
self
.
split_on_punc
:
split_tokens
.
extend
(
self
.
_run_split_on_punc
(
token
))
else
:
split_tokens
.
append
(
token
)
output_tokens
=
whitespace_tokenize
(
" "
.
join
(
split_tokens
))
return
output_tokens
def
_run_strip_accents
(
self
,
text
):
"""Strips accents from a piece of text."""
text
=
unicodedata
.
normalize
(
"NFD"
,
text
)
output
=
[]
for
char
in
text
:
cat
=
unicodedata
.
category
(
char
)
if
cat
==
"Mn"
:
continue
output
.
append
(
char
)
return
""
.
join
(
output
)
def
_run_split_on_punc
(
self
,
text
):
"""Splits punctuation on a piece of text."""
chars
=
list
(
text
)
i
=
0
start_new_word
=
True
output
=
[]
while
i
<
len
(
chars
):
char
=
chars
[
i
]
if
_is_punctuation
(
char
):
output
.
append
([
char
])
start_new_word
=
True
else
:
if
start_new_word
:
output
.
append
([])
start_new_word
=
False
output
[
-
1
].
append
(
char
)
i
+=
1
return
[
""
.
join
(
x
)
for
x
in
output
]
def
_tokenize_chinese_chars
(
self
,
text
):
"""Adds whitespace around any CJK character."""
output
=
[]
for
char
in
text
:
cp
=
ord
(
char
)
if
self
.
_is_chinese_char
(
cp
):
output
.
append
(
" "
)
output
.
append
(
char
)
output
.
append
(
" "
)
else
:
output
.
append
(
char
)
return
""
.
join
(
output
)
def
_is_chinese_char
(
self
,
cp
):
"""Checks whether CP is the codepoint of a CJK character."""
# This defines a "chinese character" as anything in the CJK Unicode block:
# https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
#
# Note that the CJK Unicode block is NOT all Japanese and Korean characters,
# despite its name. The modern Korean Hangul alphabet is a different block,
# as is Japanese Hiragana and Katakana. Those alphabets are used to write
# space-separated words, so they are not treated specially and handled
# like the all of the other languages.
if
((
cp
>=
0x4E00
and
cp
<=
0x9FFF
)
or
#
(
cp
>=
0x3400
and
cp
<=
0x4DBF
)
or
#
(
cp
>=
0x20000
and
cp
<=
0x2A6DF
)
or
#
(
cp
>=
0x2A700
and
cp
<=
0x2B73F
)
or
#
(
cp
>=
0x2B740
and
cp
<=
0x2B81F
)
or
#
(
cp
>=
0x2B820
and
cp
<=
0x2CEAF
)
or
(
cp
>=
0xF900
and
cp
<=
0xFAFF
)
or
#
(
cp
>=
0x2F800
and
cp
<=
0x2FA1F
)):
#
return
True
return
False
def
_clean_text
(
self
,
text
):
"""Performs invalid character removal and whitespace cleanup on text."""
output
=
[]
for
char
in
text
:
cp
=
ord
(
char
)
if
cp
==
0
or
cp
==
0xfffd
or
_is_control
(
char
):
continue
if
_is_whitespace
(
char
):
output
.
append
(
" "
)
else
:
output
.
append
(
char
)
return
""
.
join
(
output
)
class
WordpieceTokenizer
(
object
):
"""Runs WordPiece tokenziation."""
def
__init__
(
self
,
vocab
,
unk_token
=
"[UNK]"
,
max_input_chars_per_word
=
400
):
self
.
vocab
=
vocab
self
.
unk_token
=
unk_token
self
.
max_input_chars_per_word
=
max_input_chars_per_word
def
tokenize
(
self
,
text
):
"""Tokenizes a piece of text into its word pieces.
This uses a greedy longest-match-first algorithm to perform tokenization
using the given vocabulary.
For example:
input = "unaffable"
output = ["un", "##aff", "##able"]
Args:
text: A single token or whitespace separated tokens. This should have
already been passed through `BasicTokenizer.
Returns:
A list of wordpiece tokens.
"""
text
=
convert_to_unicode
(
text
)
output_tokens
=
[]
for
token
in
whitespace_tokenize
(
text
):
chars
=
list
(
token
)
if
len
(
chars
)
>
self
.
max_input_chars_per_word
:
output_tokens
.
append
(
self
.
unk_token
)
continue
is_bad
=
False
start
=
0
sub_tokens
=
[]
while
start
<
len
(
chars
):
end
=
len
(
chars
)
cur_substr
=
None
while
start
<
end
:
substr
=
""
.
join
(
chars
[
start
:
end
])
if
start
>
0
:
substr
=
"##"
+
substr
if
substr
in
self
.
vocab
:
cur_substr
=
substr
break
end
-=
1
if
cur_substr
is
None
:
is_bad
=
True
break
sub_tokens
.
append
(
cur_substr
)
start
=
end
if
is_bad
:
output_tokens
.
append
(
self
.
unk_token
)
else
:
output_tokens
.
extend
(
sub_tokens
)
return
output_tokens
def
_is_whitespace
(
char
):
"""Checks whether `chars` is a whitespace character."""
# \t, \n, and \r are technically control characters but we treat them
# as whitespace since they are generally considered as such.
if
char
==
" "
or
char
==
"
\t
"
or
char
==
"
\n
"
or
char
==
"
\r
"
:
return
True
cat
=
unicodedata
.
category
(
char
)
if
cat
==
"Zs"
:
return
True
return
False
def
_is_control
(
char
):
"""Checks whether `chars` is a control character."""
# These are technically control characters but we count them as whitespace
# characters.
if
char
==
"
\t
"
or
char
==
"
\n
"
or
char
==
"
\r
"
:
return
False
cat
=
unicodedata
.
category
(
char
)
if
cat
in
(
"Cc"
,
"Cf"
):
return
True
return
False
def
_is_punctuation
(
char
):
"""Checks whether `chars` is a punctuation character."""
cp
=
ord
(
char
)
# We treat all non-letter/number ASCII as punctuation.
# Characters such as "^", "$", and "`" are not in the Unicode
# Punctuation class but we treat them as punctuation anyways, for
# consistency.
if
((
cp
>=
33
and
cp
<=
47
)
or
(
cp
>=
58
and
cp
<=
64
)
or
(
cp
>=
91
and
cp
<=
96
)
or
(
cp
>=
123
and
cp
<=
126
)):
return
True
cat
=
unicodedata
.
category
(
char
)
if
cat
.
startswith
(
"P"
):
return
True
return
False
def
preprocess_text
(
inputs
,
remove_space
=
True
,
lower
=
False
):
"""Preprocesses data by removing extra space and normalize data.
This method is used together with sentence piece tokenizer and is forked from:
https://github.com/google-research/google-research/blob/e1f6fa00/albert/tokenization.py
Args:
inputs: The input text.
remove_space: Whether to remove the extra space.
lower: Whether to lowercase the text.
Returns:
The preprocessed text.
"""
outputs
=
inputs
if
remove_space
:
outputs
=
" "
.
join
(
inputs
.
strip
().
split
())
if
six
.
PY2
and
isinstance
(
outputs
,
str
):
try
:
outputs
=
six
.
ensure_text
(
outputs
,
"utf-8"
)
except
UnicodeDecodeError
:
outputs
=
six
.
ensure_text
(
outputs
,
"latin-1"
)
outputs
=
unicodedata
.
normalize
(
"NFKD"
,
outputs
)
outputs
=
""
.
join
([
c
for
c
in
outputs
if
not
unicodedata
.
combining
(
c
)])
if
lower
:
outputs
=
outputs
.
lower
()
return
outputs
def
encode_pieces
(
sp_model
,
text
,
sample
=
False
):
"""Segements text into pieces.
This method is used together with sentence piece tokenizer and is forked from:
https://github.com/google-research/google-research/blob/e1f6fa00/albert/tokenization.py
Args:
sp_model: A spm.SentencePieceProcessor object.
text: The input text to be segemented.
sample: Whether to randomly sample a segmentation output or return a
deterministic one.
Returns:
A list of token pieces.
"""
if
six
.
PY2
and
isinstance
(
text
,
six
.
text_type
):
text
=
six
.
ensure_binary
(
text
,
"utf-8"
)
if
not
sample
:
pieces
=
sp_model
.
EncodeAsPieces
(
text
)
else
:
pieces
=
sp_model
.
SampleEncodeAsPieces
(
text
,
64
,
0.1
)
new_pieces
=
[]
for
piece
in
pieces
:
piece
=
printable_text
(
piece
)
if
len
(
piece
)
>
1
and
piece
[
-
1
]
==
","
and
piece
[
-
2
].
isdigit
():
cur_pieces
=
sp_model
.
EncodeAsPieces
(
piece
[:
-
1
].
replace
(
SPIECE_UNDERLINE
,
""
))
if
piece
[
0
]
!=
SPIECE_UNDERLINE
and
cur_pieces
[
0
][
0
]
==
SPIECE_UNDERLINE
:
if
len
(
cur_pieces
[
0
])
==
1
:
cur_pieces
=
cur_pieces
[
1
:]
else
:
cur_pieces
[
0
]
=
cur_pieces
[
0
][
1
:]
cur_pieces
.
append
(
piece
[
-
1
])
new_pieces
.
extend
(
cur_pieces
)
else
:
new_pieces
.
append
(
piece
)
return
new_pieces
def
encode_ids
(
sp_model
,
text
,
sample
=
False
):
"""Segments text and return token ids.
This method is used together with sentence piece tokenizer and is forked from:
https://github.com/google-research/google-research/blob/e1f6fa00/albert/tokenization.py
Args:
sp_model: A spm.SentencePieceProcessor object.
text: The input text to be segemented.
sample: Whether to randomly sample a segmentation output or return a
deterministic one.
Returns:
A list of token ids.
"""
pieces
=
encode_pieces
(
sp_model
,
text
,
sample
=
sample
)
ids
=
[
sp_model
.
PieceToId
(
piece
)
for
piece
in
pieces
]
return
ids
class
FullSentencePieceTokenizer
(
object
):
"""Runs end-to-end sentence piece tokenization.
The interface of this class is intended to keep the same as above
`FullTokenizer` class for easier usage.
"""
def
__init__
(
self
,
sp_model_file
):
"""Inits FullSentencePieceTokenizer.
Args:
sp_model_file: The path to the sentence piece model file.
"""
self
.
sp_model
=
spm
.
SentencePieceProcessor
()
self
.
sp_model
.
Load
(
sp_model_file
)
self
.
vocab
=
{
self
.
sp_model
.
IdToPiece
(
i
):
i
for
i
in
six
.
moves
.
range
(
self
.
sp_model
.
GetPieceSize
())
}
def
tokenize
(
self
,
text
):
"""Tokenizes text into pieces."""
return
encode_pieces
(
self
.
sp_model
,
text
)
def
convert_tokens_to_ids
(
self
,
tokens
):
"""Converts a list of tokens to a list of ids."""
return
[
self
.
sp_model
.
PieceToId
(
printable_text
(
token
))
for
token
in
tokens
]
def
convert_ids_to_tokens
(
self
,
ids
):
"""Converts a list of ids ot a list of tokens."""
return
[
self
.
sp_model
.
IdToPiece
(
id_
)
for
id_
in
ids
]
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/bert/tokenization_test.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
tempfile
import
six
import
tensorflow
as
tf
from
official.nlp.bert
import
tokenization
class
TokenizationTest
(
tf
.
test
.
TestCase
):
"""Tokenization test.
The implementation is forked from
https://github.com/google-research/bert/blob/master/tokenization_test.py."
"""
def
test_full_tokenizer
(
self
):
vocab_tokens
=
[
"[UNK]"
,
"[CLS]"
,
"[SEP]"
,
"want"
,
"##want"
,
"##ed"
,
"wa"
,
"un"
,
"runn"
,
"##ing"
,
","
]
with
tempfile
.
NamedTemporaryFile
(
delete
=
False
)
as
vocab_writer
:
if
six
.
PY2
:
vocab_writer
.
write
(
""
.
join
([
x
+
"
\n
"
for
x
in
vocab_tokens
]))
else
:
vocab_writer
.
write
(
""
.
join
([
x
+
"
\n
"
for
x
in
vocab_tokens
]).
encode
(
"utf-8"
))
vocab_file
=
vocab_writer
.
name
tokenizer
=
tokenization
.
FullTokenizer
(
vocab_file
)
os
.
unlink
(
vocab_file
)
tokens
=
tokenizer
.
tokenize
(
u
"UNwant
\u00E9
d,running"
)
self
.
assertAllEqual
(
tokens
,
[
"un"
,
"##want"
,
"##ed"
,
","
,
"runn"
,
"##ing"
])
self
.
assertAllEqual
(
tokenizer
.
convert_tokens_to_ids
(
tokens
),
[
7
,
4
,
5
,
10
,
8
,
9
])
def
test_chinese
(
self
):
tokenizer
=
tokenization
.
BasicTokenizer
()
self
.
assertAllEqual
(
tokenizer
.
tokenize
(
u
"ah
\u535A\u63A8
zz"
),
[
u
"ah"
,
u
"
\u535A
"
,
u
"
\u63A8
"
,
u
"zz"
])
def
test_basic_tokenizer_lower
(
self
):
tokenizer
=
tokenization
.
BasicTokenizer
(
do_lower_case
=
True
)
self
.
assertAllEqual
(
tokenizer
.
tokenize
(
u
"
\t
HeLLo!how
\n
Are yoU? "
),
[
"hello"
,
"!"
,
"how"
,
"are"
,
"you"
,
"?"
])
self
.
assertAllEqual
(
tokenizer
.
tokenize
(
u
"H
\u00E9
llo"
),
[
"hello"
])
def
test_basic_tokenizer_no_lower
(
self
):
tokenizer
=
tokenization
.
BasicTokenizer
(
do_lower_case
=
False
)
self
.
assertAllEqual
(
tokenizer
.
tokenize
(
u
"
\t
HeLLo!how
\n
Are yoU? "
),
[
"HeLLo"
,
"!"
,
"how"
,
"Are"
,
"yoU"
,
"?"
])
def
test_basic_tokenizer_no_split_on_punc
(
self
):
tokenizer
=
tokenization
.
BasicTokenizer
(
do_lower_case
=
True
,
split_on_punc
=
False
)
self
.
assertAllEqual
(
tokenizer
.
tokenize
(
u
"
\t
HeLLo!how
\n
Are yoU? "
),
[
"hello!how"
,
"are"
,
"you?"
])
def
test_wordpiece_tokenizer
(
self
):
vocab_tokens
=
[
"[UNK]"
,
"[CLS]"
,
"[SEP]"
,
"want"
,
"##want"
,
"##ed"
,
"wa"
,
"un"
,
"runn"
,
"##ing"
,
"##!"
,
"!"
]
vocab
=
{}
for
(
i
,
token
)
in
enumerate
(
vocab_tokens
):
vocab
[
token
]
=
i
tokenizer
=
tokenization
.
WordpieceTokenizer
(
vocab
=
vocab
)
self
.
assertAllEqual
(
tokenizer
.
tokenize
(
""
),
[])
self
.
assertAllEqual
(
tokenizer
.
tokenize
(
"unwanted running"
),
[
"un"
,
"##want"
,
"##ed"
,
"runn"
,
"##ing"
])
self
.
assertAllEqual
(
tokenizer
.
tokenize
(
"unwanted running !"
),
[
"un"
,
"##want"
,
"##ed"
,
"runn"
,
"##ing"
,
"!"
])
self
.
assertAllEqual
(
tokenizer
.
tokenize
(
"unwanted running!"
),
[
"un"
,
"##want"
,
"##ed"
,
"runn"
,
"##ing"
,
"##!"
])
self
.
assertAllEqual
(
tokenizer
.
tokenize
(
"unwantedX running"
),
[
"[UNK]"
,
"runn"
,
"##ing"
])
def
test_convert_tokens_to_ids
(
self
):
vocab_tokens
=
[
"[UNK]"
,
"[CLS]"
,
"[SEP]"
,
"want"
,
"##want"
,
"##ed"
,
"wa"
,
"un"
,
"runn"
,
"##ing"
]
vocab
=
{}
for
(
i
,
token
)
in
enumerate
(
vocab_tokens
):
vocab
[
token
]
=
i
self
.
assertAllEqual
(
tokenization
.
convert_tokens_to_ids
(
vocab
,
[
"un"
,
"##want"
,
"##ed"
,
"runn"
,
"##ing"
]),
[
7
,
4
,
5
,
8
,
9
])
def
test_is_whitespace
(
self
):
self
.
assertTrue
(
tokenization
.
_is_whitespace
(
u
" "
))
self
.
assertTrue
(
tokenization
.
_is_whitespace
(
u
"
\t
"
))
self
.
assertTrue
(
tokenization
.
_is_whitespace
(
u
"
\r
"
))
self
.
assertTrue
(
tokenization
.
_is_whitespace
(
u
"
\n
"
))
self
.
assertTrue
(
tokenization
.
_is_whitespace
(
u
"
\u00A0
"
))
self
.
assertFalse
(
tokenization
.
_is_whitespace
(
u
"A"
))
self
.
assertFalse
(
tokenization
.
_is_whitespace
(
u
"-"
))
def
test_is_control
(
self
):
self
.
assertTrue
(
tokenization
.
_is_control
(
u
"
\u0005
"
))
self
.
assertFalse
(
tokenization
.
_is_control
(
u
"A"
))
self
.
assertFalse
(
tokenization
.
_is_control
(
u
" "
))
self
.
assertFalse
(
tokenization
.
_is_control
(
u
"
\t
"
))
self
.
assertFalse
(
tokenization
.
_is_control
(
u
"
\r
"
))
self
.
assertFalse
(
tokenization
.
_is_control
(
u
"
\U0001F4A9
"
))
def
test_is_punctuation
(
self
):
self
.
assertTrue
(
tokenization
.
_is_punctuation
(
u
"-"
))
self
.
assertTrue
(
tokenization
.
_is_punctuation
(
u
"$"
))
self
.
assertTrue
(
tokenization
.
_is_punctuation
(
u
"`"
))
self
.
assertTrue
(
tokenization
.
_is_punctuation
(
u
"."
))
self
.
assertFalse
(
tokenization
.
_is_punctuation
(
u
"A"
))
self
.
assertFalse
(
tokenization
.
_is_punctuation
(
u
" "
))
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/configs/__init__.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/nlp/configs/bert.py
0 → 100644
View file @
cf66c525
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Multi-head BERT encoder network with classification heads.
Includes configurations and instantiation methods.
"""
from
typing
import
List
,
Optional
,
Text
import
dataclasses
from
official.modeling.hyperparams
import
base_config
from
official.nlp.configs
import
encoders
@
dataclasses
.
dataclass
class
ClsHeadConfig
(
base_config
.
Config
):
inner_dim
:
int
=
0
num_classes
:
int
=
2
activation
:
Optional
[
Text
]
=
"tanh"
dropout_rate
:
float
=
0.0
cls_token_idx
:
int
=
0
name
:
Optional
[
Text
]
=
None
@
dataclasses
.
dataclass
class
PretrainerConfig
(
base_config
.
Config
):
"""Pretrainer configuration."""
encoder
:
encoders
.
EncoderConfig
=
encoders
.
EncoderConfig
()
cls_heads
:
List
[
ClsHeadConfig
]
=
dataclasses
.
field
(
default_factory
=
list
)
mlm_activation
:
str
=
"gelu"
mlm_initializer_range
:
float
=
0.02
Prev
1
…
9
10
11
12
13
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment