Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
f16a7b5b
Unverified
Commit
f16a7b5b
authored
May 04, 2021
by
vedanshu
Committed by
GitHub
May 04, 2021
Browse files
Merge pull request
#1
from tensorflow/master
new pull
parents
8e9296ff
8f58f396
Changes
298
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
432 additions
and
316 deletions
+432
-316
official/nlp/bert/configs.py
official/nlp/bert/configs.py
+3
-7
official/nlp/bert/export_tfhub.py
official/nlp/bert/export_tfhub.py
+60
-16
official/nlp/bert/export_tfhub_test.py
official/nlp/bert/export_tfhub_test.py
+9
-10
official/nlp/bert/input_pipeline.py
official/nlp/bert/input_pipeline.py
+28
-11
official/nlp/bert/model_saving_utils.py
official/nlp/bert/model_saving_utils.py
+10
-19
official/nlp/bert/model_training_utils.py
official/nlp/bert/model_training_utils.py
+46
-29
official/nlp/bert/model_training_utils_test.py
official/nlp/bert/model_training_utils_test.py
+35
-37
official/nlp/bert/run_classifier.py
official/nlp/bert/run_classifier.py
+40
-21
official/nlp/bert/run_pretraining.py
official/nlp/bert/run_pretraining.py
+30
-9
official/nlp/bert/run_squad.py
official/nlp/bert/run_squad.py
+6
-11
official/nlp/bert/run_squad_helper.py
official/nlp/bert/run_squad_helper.py
+43
-52
official/nlp/bert/serving.py
official/nlp/bert/serving.py
+7
-8
official/nlp/bert/squad_evaluate_v1_1.py
official/nlp/bert/squad_evaluate_v1_1.py
+4
-6
official/nlp/bert/squad_evaluate_v2_0.py
official/nlp/bert/squad_evaluate_v2_0.py
+3
-6
official/nlp/bert/tf1_checkpoint_converter_lib.py
official/nlp/bert/tf1_checkpoint_converter_lib.py
+14
-8
official/nlp/bert/tf2_encoder_checkpoint_converter.py
official/nlp/bert/tf2_encoder_checkpoint_converter.py
+67
-16
official/nlp/bert/tokenization.py
official/nlp/bert/tokenization.py
+6
-10
official/nlp/bert/tokenization_test.py
official/nlp/bert/tokenization_test.py
+1
-5
official/nlp/configs/__init__.py
official/nlp/configs/__init__.py
+13
-0
official/nlp/configs/bert.py
official/nlp/configs/bert.py
+7
-35
No files found.
Too many changes to show.
To preserve performance only
298 of 298+
files are displayed.
Plain diff
Email patch
official/nlp/bert/configs.py
View file @
f16a7b5b
# Copyright 201
9
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,15 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""The main BERT model and related functions."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
"""The main BERT model and related functions."""
import
copy
import
json
import
six
import
tensorflow
as
tf
...
...
@@ -105,4 +102,3 @@ class BertConfig(object):
def
to_json_string
(
self
):
"""Serializes this instance to a JSON string."""
return
json
.
dumps
(
self
.
to_dict
(),
indent
=
2
,
sort_keys
=
True
)
+
"
\n
"
official/nlp/bert/export_tfhub.py
View file @
f16a7b5b
# Copyright 201
9
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,18 +11,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A script to export the BERT core model as a TF-Hub SavedModel."""
from
__future__
import
absolute_import
from
__future__
import
division
# from __future__ import google_type_annotations
from
__future__
import
print_function
"""A script to export BERT as a TF-Hub SavedModel.
This script is **DEPRECATED** for exporting BERT encoder models;
see the error message in by main() for details.
"""
from
typing
import
Text
# Import libraries
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
tensorflow
as
tf
from
typing
import
Text
from
official.nlp.bert
import
bert_models
from
official.nlp.bert
import
configs
...
...
@@ -35,9 +37,12 @@ flags.DEFINE_string("model_checkpoint_path", None,
flags
.
DEFINE_string
(
"export_path"
,
None
,
"TF-Hub SavedModel destination path."
)
flags
.
DEFINE_string
(
"vocab_file"
,
None
,
"The vocabulary file that the BERT model was trained on."
)
flags
.
DEFINE_bool
(
"do_lower_case"
,
None
,
"Whether to lowercase. If None, "
"do_lower_case will be enabled if 'uncased' appears in the "
"name of --vocab_file"
)
flags
.
DEFINE_bool
(
"do_lower_case"
,
None
,
"Whether to lowercase. If None, "
"do_lower_case will be enabled if 'uncased' appears in the "
"name of --vocab_file"
)
flags
.
DEFINE_enum
(
"model_type"
,
"encoder"
,
[
"encoder"
,
"squad"
],
"What kind of BERT model to export."
)
def
create_bert_model
(
bert_config
:
configs
.
BertConfig
)
->
tf
.
keras
.
Model
:
...
...
@@ -68,8 +73,10 @@ def create_bert_model(bert_config: configs.BertConfig) -> tf.keras.Model:
def
export_bert_tfhub
(
bert_config
:
configs
.
BertConfig
,
model_checkpoint_path
:
Text
,
hub_destination
:
Text
,
vocab_file
:
Text
,
do_lower_case
:
bool
=
None
):
model_checkpoint_path
:
Text
,
hub_destination
:
Text
,
vocab_file
:
Text
,
do_lower_case
:
bool
=
None
):
"""Restores a tf.keras.Model and saves for TF-Hub."""
# If do_lower_case is not explicit, default to checking whether "uncased" is
# in the vocab file name
...
...
@@ -78,17 +85,54 @@ def export_bert_tfhub(bert_config: configs.BertConfig,
logging
.
info
(
"Using do_lower_case=%s based on name of vocab_file=%s"
,
do_lower_case
,
vocab_file
)
core_model
,
encoder
=
create_bert_model
(
bert_config
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
encoder
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
encoder
,
# Legacy checkpoints.
encoder
=
encoder
)
checkpoint
.
restore
(
model_checkpoint_path
).
assert_existing_objects_matched
()
core_model
.
vocab_file
=
tf
.
saved_model
.
Asset
(
vocab_file
)
core_model
.
do_lower_case
=
tf
.
Variable
(
do_lower_case
,
trainable
=
False
)
core_model
.
save
(
hub_destination
,
include_optimizer
=
False
,
save_format
=
"tf"
)
def
export_bert_squad_tfhub
(
bert_config
:
configs
.
BertConfig
,
model_checkpoint_path
:
Text
,
hub_destination
:
Text
,
vocab_file
:
Text
,
do_lower_case
:
bool
=
None
):
"""Restores a tf.keras.Model for BERT with SQuAD and saves for TF-Hub."""
# If do_lower_case is not explicit, default to checking whether "uncased" is
# in the vocab file name
if
do_lower_case
is
None
:
do_lower_case
=
"uncased"
in
vocab_file
logging
.
info
(
"Using do_lower_case=%s based on name of vocab_file=%s"
,
do_lower_case
,
vocab_file
)
span_labeling
,
_
=
bert_models
.
squad_model
(
bert_config
,
max_seq_length
=
None
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
span_labeling
)
checkpoint
.
restore
(
model_checkpoint_path
).
assert_existing_objects_matched
()
span_labeling
.
vocab_file
=
tf
.
saved_model
.
Asset
(
vocab_file
)
span_labeling
.
do_lower_case
=
tf
.
Variable
(
do_lower_case
,
trainable
=
False
)
span_labeling
.
save
(
hub_destination
,
include_optimizer
=
False
,
save_format
=
"tf"
)
def
main
(
_
):
bert_config
=
configs
.
BertConfig
.
from_json_file
(
FLAGS
.
bert_config_file
)
export_bert_tfhub
(
bert_config
,
FLAGS
.
model_checkpoint_path
,
FLAGS
.
export_path
,
FLAGS
.
vocab_file
,
FLAGS
.
do_lower_case
)
if
FLAGS
.
model_type
==
"encoder"
:
deprecation_note
=
(
"nlp/bert/export_tfhub is **DEPRECATED** for exporting BERT encoder "
"models. Please switch to nlp/tools/export_tfhub for exporting BERT "
"(and other) encoders with dict inputs/outputs conforming to "
"https://www.tensorflow.org/hub/common_saved_model_apis/text#transformer-encoders"
)
logging
.
error
(
deprecation_note
)
print
(
"
\n\n
NOTICE:"
,
deprecation_note
,
"
\n
"
)
export_bert_tfhub
(
bert_config
,
FLAGS
.
model_checkpoint_path
,
FLAGS
.
export_path
,
FLAGS
.
vocab_file
,
FLAGS
.
do_lower_case
)
elif
FLAGS
.
model_type
==
"squad"
:
export_bert_squad_tfhub
(
bert_config
,
FLAGS
.
model_checkpoint_path
,
FLAGS
.
export_path
,
FLAGS
.
vocab_file
,
FLAGS
.
do_lower_case
)
else
:
raise
ValueError
(
"Unsupported model_type %s."
%
FLAGS
.
model_type
)
if
__name__
==
"__main__"
:
...
...
official/nlp/bert/export_tfhub_test.py
View file @
f16a7b5b
# Copyright 201
9
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,26 +11,24 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests official.nlp.bert.export_tfhub."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
"""Tests official.nlp.bert.export_tfhub."""
import
os
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow_hub
as
hub
from
official.nlp.bert
import
configs
from
official.nlp.bert
import
export_tfhub
class
ExportTfhubTest
(
tf
.
test
.
TestCase
):
class
ExportTfhubTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
def
test_export_tfhub
(
self
):
@
parameterized
.
parameters
(
"model"
,
"encoder"
)
def
test_export_tfhub
(
self
,
ckpt_key_name
):
# Exports a savedmodel for TF-Hub
hidden_size
=
16
bert_config
=
configs
.
BertConfig
(
...
...
@@ -42,7 +40,7 @@ class ExportTfhubTest(tf.test.TestCase):
num_hidden_layers
=
1
)
bert_model
,
encoder
=
export_tfhub
.
create_bert_model
(
bert_config
)
model_checkpoint_dir
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
"checkpoint"
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
encoder
)
checkpoint
=
tf
.
train
.
Checkpoint
(
**
{
ckpt_key_name
:
encoder
}
)
checkpoint
.
save
(
os
.
path
.
join
(
model_checkpoint_dir
,
"test"
))
model_checkpoint_path
=
tf
.
train
.
latest_checkpoint
(
model_checkpoint_dir
)
...
...
@@ -91,6 +89,7 @@ class ExportTfhubTest(tf.test.TestCase):
outputs
=
np
.
concatenate
(
[
hub_layer
(
inputs
,
training
=
training
)[
0
]
for
_
in
range
(
num_runs
)])
return
np
.
mean
(
np
.
std
(
outputs
,
axis
=
0
))
self
.
assertLess
(
_dropout_mean_stddev
(
training
=
False
),
1e-6
)
self
.
assertGreater
(
_dropout_mean_stddev
(
training
=
True
),
1e-3
)
...
...
official/nlp/bert/input_pipeline.py
View file @
f16a7b5b
# Copyright 201
9
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,12 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""BERT model input pipelines."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
"""BERT model input pipelines."""
import
tensorflow
as
tf
...
...
@@ -36,11 +32,13 @@ def decode_record(record, name_to_features):
return
example
def
single_file_dataset
(
input_file
,
name_to_features
):
def
single_file_dataset
(
input_file
,
name_to_features
,
num_samples
=
None
):
"""Creates a single-file dataset to be passed for BERT custom training."""
# For training, we want a lot of parallel reading and shuffling.
# For eval, we want no shuffling and parallel reading doesn't matter.
d
=
tf
.
data
.
TFRecordDataset
(
input_file
)
if
num_samples
:
d
=
d
.
take
(
num_samples
)
d
=
d
.
map
(
lambda
record
:
decode_record
(
record
,
name_to_features
),
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
...
...
@@ -156,7 +154,8 @@ def create_classifier_dataset(file_path,
is_training
=
True
,
input_pipeline_context
=
None
,
label_type
=
tf
.
int64
,
include_sample_weights
=
False
):
include_sample_weights
=
False
,
num_samples
=
None
):
"""Creates input dataset from (tf)records files for train/eval."""
name_to_features
=
{
'input_ids'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
...
...
@@ -166,7 +165,8 @@ def create_classifier_dataset(file_path,
}
if
include_sample_weights
:
name_to_features
[
'weight'
]
=
tf
.
io
.
FixedLenFeature
([],
tf
.
float32
)
dataset
=
single_file_dataset
(
file_path
,
name_to_features
)
dataset
=
single_file_dataset
(
file_path
,
name_to_features
,
num_samples
=
num_samples
)
# The dataset is always sharded by number of hosts.
# num_input_pipelines is the number of hosts rather than number of cores.
...
...
@@ -258,7 +258,7 @@ def create_retrieval_dataset(file_path,
'input_ids'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'input_mask'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'segment_ids'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'
int
_id
en
'
:
tf
.
io
.
FixedLenFeature
([
1
],
tf
.
int64
),
'
example
_id'
:
tf
.
io
.
FixedLenFeature
([
1
],
tf
.
int64
),
}
dataset
=
single_file_dataset
(
file_path
,
name_to_features
)
...
...
@@ -274,12 +274,29 @@ def create_retrieval_dataset(file_path,
'input_mask'
:
record
[
'input_mask'
],
'input_type_ids'
:
record
[
'segment_ids'
]
}
y
=
record
[
'
int
_id
en
'
]
y
=
record
[
'
example
_id'
]
return
(
x
,
y
)
dataset
=
dataset
.
map
(
_select_data_from_record
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
dataset
=
dataset
.
batch
(
batch_size
,
drop_remainder
=
False
)
def
_pad_to_batch
(
x
,
y
):
cur_size
=
tf
.
shape
(
y
)[
0
]
pad_size
=
batch_size
-
cur_size
pad_ids
=
tf
.
zeros
(
shape
=
[
pad_size
,
seq_length
],
dtype
=
tf
.
int32
)
for
key
in
(
'input_word_ids'
,
'input_mask'
,
'input_type_ids'
):
x
[
key
]
=
tf
.
concat
([
x
[
key
],
pad_ids
],
axis
=
0
)
pad_labels
=
-
tf
.
ones
(
shape
=
[
pad_size
,
1
],
dtype
=
tf
.
int32
)
y
=
tf
.
concat
([
y
,
pad_labels
],
axis
=
0
)
return
x
,
y
dataset
=
dataset
.
map
(
_pad_to_batch
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
dataset
=
dataset
.
prefetch
(
tf
.
data
.
experimental
.
AUTOTUNE
)
return
dataset
official/nlp/bert/model_saving_utils.py
View file @
f16a7b5b
# Copyright 201
9
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,13 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utilities to save models."""
from
__future__
import
absolute_import
from
__future__
import
division
# from __future__ import google_type_annotations
from
__future__
import
print_function
"""Utilities to save models."""
import
os
...
...
@@ -32,19 +27,19 @@ def export_bert_model(model_export_path: typing.Text,
restore_model_using_load_weights
:
bool
=
False
)
->
None
:
"""Export BERT model for serving which does not include the optimizer.
Arg
ument
s:
Args:
model_export_path: Path to which exported model will be saved.
model: Keras model object to export.
checkpoint_dir: Path from which model weights will be loaded, if
specified.
restore_model_using_load_weights: Whether to use checkpoint.restore() API
for custom checkpoint or to use model.load_weights() API.
There are 2
different ways to save checkpoints. One is using
tf.train.Checkpoint and
another is using Keras model.save_weights().
Custom training loop
implementation uses tf.train.Checkpoint API
and Keras ModelCheckpoint
callback internally uses model.save_weights()
API. Since these two API's
cannot be used toghether, model loading logic
must be take into account
how model checkpoint was saved.
for custom checkpoint or to use model.load_weights() API.
There are 2
different ways to save checkpoints. One is using
tf.train.Checkpoint and
another is using Keras model.save_weights().
Custom training loop
implementation uses tf.train.Checkpoint API
and Keras ModelCheckpoint
callback internally uses model.save_weights()
API. Since these two API's
cannot be used toghether, model loading logic
must be take into account
how model checkpoint was saved.
Raises:
ValueError when either model_export_path or model is not specified.
...
...
@@ -55,14 +50,10 @@ def export_bert_model(model_export_path: typing.Text,
raise
ValueError
(
'model must be a tf.keras.Model object.'
)
if
checkpoint_dir
:
# Keras compile/fit() was used to save checkpoint using
# model.save_weights().
if
restore_model_using_load_weights
:
model_weight_path
=
os
.
path
.
join
(
checkpoint_dir
,
'checkpoint'
)
assert
tf
.
io
.
gfile
.
exists
(
model_weight_path
)
model
.
load_weights
(
model_weight_path
)
# tf.train.Checkpoint API was used via custom training loop logic.
else
:
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
model
)
...
...
official/nlp/bert/model_training_utils.py
View file @
f16a7b5b
# Copyright 201
9
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,12 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A light weight utilities to train NLP models."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
"""A light weight utilities to train NLP models."""
import
json
import
os
...
...
@@ -25,8 +21,8 @@ import tempfile
from
absl
import
logging
import
tensorflow
as
tf
from
tensorflow.python.util
import
deprecation
from
official.common
import
distribute_utils
from
official.staging.training
import
grad_utils
from
official.utils.misc
import
distribution_utils
_SUMMARY_TXT
=
'training_summary.txt'
_MIN_SUMMARY_STEPS
=
10
...
...
@@ -65,8 +61,7 @@ def _get_input_iterator(input_fn, strategy):
# pass callable that returns a dataset.
if
not
callable
(
input_fn
):
raise
ValueError
(
'`input_fn` should be a closure that returns a dataset.'
)
iterator
=
iter
(
strategy
.
experimental_distribute_datasets_from_function
(
input_fn
))
iterator
=
iter
(
strategy
.
distribute_datasets_from_function
(
input_fn
))
return
iterator
...
...
@@ -75,6 +70,13 @@ def _float_metric_value(metric):
return
metric
.
result
().
numpy
().
astype
(
float
)
def
clip_by_global_norm_callback
(
grads_and_vars
):
"""Performs gradient clipping."""
grads
,
variables
=
zip
(
*
grads_and_vars
)
(
clipped_grads
,
_
)
=
tf
.
clip_by_global_norm
(
grads
,
clip_norm
=
1.0
)
return
zip
(
clipped_grads
,
variables
)
def
steps_to_run
(
current_step
,
steps_per_epoch
,
steps_per_loop
):
"""Calculates steps to run on device."""
if
steps_per_loop
<=
0
:
...
...
@@ -126,10 +128,11 @@ def run_customized_training_loop(
explicit_allreduce
=
False
,
pre_allreduce_callbacks
=
None
,
post_allreduce_callbacks
=
None
,
train_summary_interval
=
0
):
train_summary_interval
=
0
,
allreduce_bytes_per_pack
=
0
):
"""Run BERT pretrain model training using low-level API.
Arg
ument
s:
Args:
_sentinel: Used to prevent positional parameters. Internal, do not use.
strategy: Distribution strategy on which to run low level training loop.
model_fn: Function that returns a tuple (model, sub_model). Caller of this
...
...
@@ -156,16 +159,16 @@ def run_customized_training_loop(
evaluation is skipped.
eval_steps: Number of steps to run evaluation. Required if `eval_input_fn`
is not none.
metric_fn: A metrics function that returns a Keras Metric object
to rec
or
d
evaluation result using evaluation dataset or with training dataset
after every epoch.
metric_fn: A metrics function that returns
either
a Keras Metric object or
a list of Keras Metric objects to record evaluation result using
evaluation dataset or with training dataset
after every epoch.
init_checkpoint: Optional checkpoint to load to `sub_model` returned by
`model_fn`.
custom_callbacks: A list of Keras Callbacks objects to run during
training. More specifically, `on_train_begin(), on_train_end(),
on_batch_begin()`, `on_batch_end()`, `on_epoch_begin()`,
`on_epoch_end()` methods are invoked during training.
Note that some
metrics may be missing from `logs`.
`on_epoch_end()` methods are invoked during training.
Note that some
metrics may be missing from `logs`.
run_eagerly: Whether to run model training in pure eager execution. This
should be disable for TPUStrategy.
sub_model_export_name: If not None, will export `sub_model` returned by
...
...
@@ -194,6 +197,11 @@ def run_customized_training_loop(
when explicit_allreduce=True.
train_summary_interval: Step interval for training summaries. If the value
is a negative number, then training summaries are not enabled.
allreduce_bytes_per_pack: A non-negative integer. Breaks collective
operations into packs of certain size. If it's zero, all gradients are
in one pack. Breaking gradient into packs could enable overlap between
allreduce and backprop computation. This flag only takes effect when
explicit_allreduce is set to True.'
Returns:
Trained model.
...
...
@@ -237,7 +245,9 @@ def run_customized_training_loop(
assert
tf
.
executing_eagerly
()
if
run_eagerly
:
if
isinstance
(
strategy
,
tf
.
distribute
.
experimental
.
TPUStrategy
):
if
isinstance
(
strategy
,
(
tf
.
distribute
.
TPUStrategy
,
tf
.
distribute
.
experimental
.
TPUStrategy
)):
raise
ValueError
(
'TPUStrategy should not run eagerly as it heavily relies on graph'
' optimization for the distributed system.'
)
...
...
@@ -253,7 +263,7 @@ def run_customized_training_loop(
train_iterator
=
_get_input_iterator
(
train_input_fn
,
strategy
)
eval_loss_metric
=
tf
.
keras
.
metrics
.
Mean
(
'training_loss'
,
dtype
=
tf
.
float32
)
with
distribut
ion
_utils
.
get_strategy_scope
(
strategy
):
with
distribut
e
_utils
.
get_strategy_scope
(
strategy
):
# To correctly place the model weights on accelerators,
# model and optimizer should be created in scope.
model
,
sub_model
=
model_fn
()
...
...
@@ -273,12 +283,14 @@ def run_customized_training_loop(
logging
.
info
(
'Checkpoint file %s found and restoring from '
'initial checkpoint for core model.'
,
init_checkpoint
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
sub_model
)
checkpoint
.
re
store
(
init_checkpoint
).
assert_existing_objects_matched
()
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
sub_model
,
encoder
=
sub_model
)
checkpoint
.
re
ad
(
init_checkpoint
).
assert_existing_objects_matched
()
logging
.
info
(
'Loading from checkpoint file completed'
)
train_loss_metric
=
tf
.
keras
.
metrics
.
Mean
(
'training_loss'
,
dtype
=
tf
.
float32
)
eval_metrics
=
[
metric_fn
()]
if
metric_fn
else
[]
eval_metrics
=
metric_fn
()
if
metric_fn
else
[]
if
not
isinstance
(
eval_metrics
,
list
):
eval_metrics
=
[
eval_metrics
]
# If evaluation is required, make a copy of metric as it will be used by
# both train and evaluation.
train_metrics
=
[
...
...
@@ -325,10 +337,10 @@ def run_customized_training_loop(
grad_utils
.
minimize_using_explicit_allreduce
(
tape
,
optimizer
,
loss
,
training_vars
,
pre_allreduce_callbacks
,
post_allreduce_callbacks
)
post_allreduce_callbacks
,
allreduce_bytes_per_pack
)
else
:
if
isinstance
(
optimizer
,
tf
.
keras
.
mixed_precision
.
experimental
.
LossScaleOptimizer
):
if
isinstance
(
optimizer
,
tf
.
keras
.
mixed_precision
.
LossScaleOptimizer
):
with
tape
:
scaled_loss
=
optimizer
.
get_scaled_loss
(
loss
)
scaled_grads
=
tape
.
gradient
(
scaled_loss
,
training_vars
)
...
...
@@ -458,8 +470,7 @@ def run_customized_training_loop(
callback_list
.
on_train_begin
()
while
current_step
<
total_training_steps
and
not
model
.
stop_training
:
if
current_step
%
steps_per_epoch
==
0
:
callback_list
.
on_epoch_begin
(
int
(
current_step
/
steps_per_epoch
)
+
1
)
callback_list
.
on_epoch_begin
(
int
(
current_step
/
steps_per_epoch
)
+
1
)
# Training loss/metric are taking average over steps inside micro
# training loop. We reset the their values before each round.
...
...
@@ -524,13 +535,14 @@ def run_customized_training_loop(
_save_checkpoint
(
strategy
,
checkpoint
,
model_dir
,
checkpoint_name
.
format
(
step
=
current_step
))
if
eval_input_fn
:
logging
.
info
(
'Running evaluation after step: %s.'
,
current_step
)
logs
=
_run_evaluation
(
current_step
,
_get_input_iterator
(
eval_input_fn
,
strategy
))
# Re-initialize evaluation metric.
eval_loss_metric
.
reset_states
()
for
metric
in
eval_metrics
+
model
.
metrics
:
metric
.
reset_states
()
logging
.
info
(
'Running evaluation after step: %s.'
,
current_step
)
logs
=
_run_evaluation
(
current_step
,
_get_input_iterator
(
eval_input_fn
,
strategy
))
# We add train_loss here rather than call on_batch_end twice to make
# sure that no duplicated values are generated.
logs
[
'loss'
]
=
train_loss
...
...
@@ -548,6 +560,11 @@ def run_customized_training_loop(
_save_checkpoint
(
strategy
,
checkpoint
,
model_dir
,
checkpoint_name
.
format
(
step
=
current_step
))
if
eval_input_fn
:
# Re-initialize evaluation metric.
eval_loss_metric
.
reset_states
()
for
metric
in
eval_metrics
+
model
.
metrics
:
metric
.
reset_states
()
logging
.
info
(
'Running final evaluation after training is complete.'
)
logs
=
_run_evaluation
(
current_step
,
_get_input_iterator
(
eval_input_fn
,
strategy
))
...
...
official/nlp/bert/model_training_utils_test.py
View file @
f16a7b5b
# Copyright 201
9
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,16 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for official.modeling.training.model_training_utils."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
"""Tests for official.modeling.training.model_training_utils."""
import
os
from
absl
import
logging
from
absl.testing
import
flagsaver
from
absl.testing
import
parameterized
from
absl.testing.absltest
import
mock
import
numpy
as
np
...
...
@@ -28,20 +25,22 @@ import tensorflow as tf
from
tensorflow.python.distribute
import
combinations
from
tensorflow.python.distribute
import
strategy_combinations
from
official.nlp.bert
import
common_flags
from
official.nlp.bert
import
model_training_utils
common_flags
.
define_common_bert_flags
()
def
eager_strategy_combinations
():
return
combinations
.
combine
(
distribution
=
[
strategy_combinations
.
default_strategy
,
strategy_combinations
.
tpu_strategy
,
strategy_combinations
.
cloud_
tpu_strategy
,
strategy_combinations
.
one_device_strategy_gpu
,
strategy_combinations
.
mirrored_strategy_with_gpu_and_cpu
,
strategy_combinations
.
mirrored_strategy_with_two_gpus
,
],
mode
=
'eager'
,
)
],)
def
eager_gpu_strategy_combinations
():
...
...
@@ -51,9 +50,7 @@ def eager_gpu_strategy_combinations():
strategy_combinations
.
one_device_strategy_gpu
,
strategy_combinations
.
mirrored_strategy_with_gpu_and_cpu
,
strategy_combinations
.
mirrored_strategy_with_two_gpus
,
],
mode
=
'eager'
,
)
],)
def
create_fake_data_input_fn
(
batch_size
,
features_shape
,
num_classes
):
...
...
@@ -106,9 +103,8 @@ def create_model_fn(input_shape, num_classes, use_float16=False):
tf
.
reduce_mean
(
input_layer
),
name
=
'mean_input'
,
aggregation
=
'mean'
)
model
.
optimizer
=
tf
.
keras
.
optimizers
.
SGD
(
learning_rate
=
0.1
,
momentum
=
0.9
)
if
use_float16
:
model
.
optimizer
=
(
tf
.
keras
.
mixed_precision
.
experimental
.
LossScaleOptimizer
(
model
.
optimizer
,
loss_scale
=
'dynamic'
))
model
.
optimizer
=
tf
.
keras
.
mixed_precision
.
LossScaleOptimizer
(
model
.
optimizer
)
return
model
,
sub_model
return
_model_fn
...
...
@@ -139,9 +135,9 @@ class RecordingCallback(tf.keras.callbacks.Callback):
def
__init__
(
self
):
self
.
batch_begin
=
[]
# (batch, logs)
self
.
batch_end
=
[]
# (batch, logs)
self
.
batch_end
=
[]
# (batch, logs)
self
.
epoch_begin
=
[]
# (epoch, logs)
self
.
epoch_end
=
[]
# (epoch, logs)
self
.
epoch_end
=
[]
# (epoch, logs)
def
on_batch_begin
(
self
,
batch
,
logs
=
None
):
self
.
batch_begin
.
append
((
batch
,
logs
))
...
...
@@ -162,6 +158,7 @@ class ModelTrainingUtilsTest(tf.test.TestCase, parameterized.TestCase):
super
(
ModelTrainingUtilsTest
,
self
).
setUp
()
self
.
_model_fn
=
create_model_fn
(
input_shape
=
[
128
],
num_classes
=
3
)
@
flagsaver
.
flagsaver
def
run_training
(
self
,
strategy
,
model_dir
,
steps_per_loop
,
run_eagerly
):
input_fn
=
create_fake_data_input_fn
(
batch_size
=
8
,
features_shape
=
[
128
],
num_classes
=
3
)
...
...
@@ -184,8 +181,10 @@ class ModelTrainingUtilsTest(tf.test.TestCase, parameterized.TestCase):
@
combinations
.
generate
(
eager_strategy_combinations
())
def
test_train_eager_single_step
(
self
,
distribution
):
model_dir
=
self
.
get_temp_dir
()
if
isinstance
(
distribution
,
tf
.
distribute
.
experimental
.
TPUStrategy
):
model_dir
=
self
.
create_tempdir
().
full_path
if
isinstance
(
distribution
,
(
tf
.
distribute
.
TPUStrategy
,
tf
.
distribute
.
experimental
.
TPUStrategy
)):
with
self
.
assertRaises
(
ValueError
):
self
.
run_training
(
distribution
,
model_dir
,
steps_per_loop
=
1
,
run_eagerly
=
True
)
...
...
@@ -195,9 +194,8 @@ class ModelTrainingUtilsTest(tf.test.TestCase, parameterized.TestCase):
@
combinations
.
generate
(
eager_gpu_strategy_combinations
())
def
test_train_eager_mixed_precision
(
self
,
distribution
):
model_dir
=
self
.
get_temp_dir
()
policy
=
tf
.
keras
.
mixed_precision
.
experimental
.
Policy
(
'mixed_float16'
)
tf
.
keras
.
mixed_precision
.
experimental
.
set_policy
(
policy
)
model_dir
=
self
.
create_tempdir
().
full_path
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'mixed_float16'
)
self
.
_model_fn
=
create_model_fn
(
input_shape
=
[
128
],
num_classes
=
3
,
use_float16
=
True
)
self
.
run_training
(
...
...
@@ -205,24 +203,26 @@ class ModelTrainingUtilsTest(tf.test.TestCase, parameterized.TestCase):
@
combinations
.
generate
(
eager_strategy_combinations
())
def
test_train_check_artifacts
(
self
,
distribution
):
model_dir
=
self
.
get
_temp
_
dir
()
model_dir
=
self
.
create
_tempdir
()
.
full_path
self
.
run_training
(
distribution
,
model_dir
,
steps_per_loop
=
10
,
run_eagerly
=
False
)
# Two checkpoints should be saved after two epochs.
files
=
map
(
os
.
path
.
basename
,
tf
.
io
.
gfile
.
glob
(
os
.
path
.
join
(
model_dir
,
'ctl_step_*index'
)))
self
.
assertCountEqual
(
[
'ctl_step_20.ckpt-1.index'
,
'ctl_step_40.ckpt-2.index'
],
files
)
self
.
assertCountEqual
(
[
'ctl_step_20.ckpt-1.index'
,
'ctl_step_40.ckpt-2.index'
],
files
)
# Three submodel checkpoints should be saved after two epochs (one after
# each epoch plus one final).
files
=
map
(
os
.
path
.
basename
,
tf
.
io
.
gfile
.
glob
(
os
.
path
.
join
(
model_dir
,
'my_submodel_name*index'
)))
self
.
assertCountEqual
([
'my_submodel_name.ckpt-3.index'
,
'my_submodel_name_step_20.ckpt-1.index'
,
'my_submodel_name_step_40.ckpt-2.index'
],
files
)
files
=
map
(
os
.
path
.
basename
,
tf
.
io
.
gfile
.
glob
(
os
.
path
.
join
(
model_dir
,
'my_submodel_name*index'
)))
self
.
assertCountEqual
([
'my_submodel_name.ckpt-3.index'
,
'my_submodel_name_step_20.ckpt-1.index'
,
'my_submodel_name_step_40.ckpt-2.index'
],
files
)
self
.
assertNotEmpty
(
tf
.
io
.
gfile
.
glob
(
...
...
@@ -247,7 +247,7 @@ class ModelTrainingUtilsTest(tf.test.TestCase, parameterized.TestCase):
@
combinations
.
generate
(
eager_strategy_combinations
())
def
test_train_check_callbacks
(
self
,
distribution
):
model_dir
=
self
.
get
_temp
_
dir
()
model_dir
=
self
.
create
_tempdir
()
.
full_path
callback
=
RecordingCallback
()
callbacks
=
[
callback
]
input_fn
=
create_fake_data_input_fn
(
...
...
@@ -286,9 +286,7 @@ class ModelTrainingUtilsTest(tf.test.TestCase, parameterized.TestCase):
combinations
.
combine
(
distribution
=
[
strategy_combinations
.
one_device_strategy_gpu
,
],
mode
=
'eager'
,
))
],))
def
test_train_check_artifacts_non_chief
(
self
,
distribution
):
# We shouldn't export artifacts on non-chief workers. Since there's no easy
# way to test with real MultiWorkerMirroredStrategy, we patch the strategy
...
...
@@ -298,7 +296,7 @@ class ModelTrainingUtilsTest(tf.test.TestCase, parameterized.TestCase):
new_callable
=
mock
.
PropertyMock
,
return_value
=
False
),
\
mock
.
patch
.
object
(
extended
.
__class__
,
'should_save_summary'
,
new_callable
=
mock
.
PropertyMock
,
return_value
=
False
):
model_dir
=
self
.
get
_temp
_
dir
()
model_dir
=
self
.
create
_tempdir
()
.
full_path
self
.
run_training
(
distribution
,
model_dir
,
steps_per_loop
=
10
,
run_eagerly
=
False
)
self
.
assertEmpty
(
tf
.
io
.
gfile
.
listdir
(
model_dir
))
...
...
official/nlp/bert/run_classifier.py
View file @
f16a7b5b
# Copyright 201
9
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,22 +11,21 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""BERT classification or regression finetuning runner in TF 2.x."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
functools
import
json
import
math
import
os
# Import libraries
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
gin
import
tensorflow
as
tf
from
official.common
import
distribute_utils
from
official.modeling
import
performance
from
official.nlp
import
optimization
from
official.nlp.bert
import
bert_models
...
...
@@ -34,7 +33,6 @@ from official.nlp.bert import common_flags
from
official.nlp.bert
import
configs
as
bert_configs
from
official.nlp.bert
import
input_pipeline
from
official.nlp.bert
import
model_saving_utils
from
official.utils.misc
import
distribution_utils
from
official.utils.misc
import
keras_utils
flags
.
DEFINE_enum
(
...
...
@@ -52,6 +50,9 @@ flags.DEFINE_string(
'input_meta_data_path'
,
None
,
'Path to file that contains meta data about input '
'to be used for training and evaluation.'
)
flags
.
DEFINE_integer
(
'train_data_size'
,
None
,
'Number of training samples '
'to use. If None, uses the full train data. '
'(default: None).'
)
flags
.
DEFINE_string
(
'predict_checkpoint_path'
,
None
,
'Path to the checkpoint for predictions.'
)
flags
.
DEFINE_integer
(
...
...
@@ -91,7 +92,8 @@ def get_dataset_fn(input_file_pattern,
global_batch_size
,
is_training
,
label_type
=
tf
.
int64
,
include_sample_weights
=
False
):
include_sample_weights
=
False
,
num_samples
=
None
):
"""Gets a closure to create a dataset."""
def
_dataset_fn
(
ctx
=
None
):
...
...
@@ -105,7 +107,8 @@ def get_dataset_fn(input_file_pattern,
is_training
=
is_training
,
input_pipeline_context
=
ctx
,
label_type
=
label_type
,
include_sample_weights
=
include_sample_weights
)
include_sample_weights
=
include_sample_weights
,
num_samples
=
num_samples
)
return
dataset
return
_dataset_fn
...
...
@@ -216,8 +219,8 @@ def run_keras_compile_fit(model_dir,
optimizer
=
bert_model
.
optimizer
if
init_checkpoint
:
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
sub_model
)
checkpoint
.
re
store
(
init_checkpoint
).
assert_existing_objects_matched
()
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
sub_model
,
encoder
=
sub_model
)
checkpoint
.
re
ad
(
init_checkpoint
).
assert_existing_objects_matched
()
if
not
isinstance
(
metric_fn
,
(
list
,
tuple
)):
metric_fn
=
[
metric_fn
]
...
...
@@ -225,7 +228,7 @@ def run_keras_compile_fit(model_dir,
optimizer
=
optimizer
,
loss
=
loss_fn
,
metrics
=
[
fn
()
for
fn
in
metric_fn
],
experimental_
steps_per_execution
=
steps_per_loop
)
steps_per_execution
=
steps_per_loop
)
summary_dir
=
os
.
path
.
join
(
model_dir
,
'summaries'
)
summary_callback
=
tf
.
keras
.
callbacks
.
TensorBoard
(
summary_dir
)
...
...
@@ -262,6 +265,7 @@ def run_keras_compile_fit(model_dir,
def
get_predictions_and_labels
(
strategy
,
trained_model
,
eval_input_fn
,
is_regression
=
False
,
return_probs
=
False
):
"""Obtains predictions of trained model on evaluation data.
...
...
@@ -272,6 +276,7 @@ def get_predictions_and_labels(strategy,
strategy: Distribution strategy.
trained_model: Trained model with preloaded weights.
eval_input_fn: Input function for evaluation data.
is_regression: Whether it is a regression task.
return_probs: Whether to return probabilities of classes.
Returns:
...
...
@@ -287,8 +292,11 @@ def get_predictions_and_labels(strategy,
"""Replicated predictions."""
inputs
,
labels
=
inputs
logits
=
trained_model
(
inputs
,
training
=
False
)
probabilities
=
tf
.
nn
.
softmax
(
logits
)
return
probabilities
,
labels
if
not
is_regression
:
probabilities
=
tf
.
nn
.
softmax
(
logits
)
return
probabilities
,
labels
else
:
return
logits
,
labels
outputs
,
labels
=
strategy
.
run
(
_test_step_fn
,
args
=
(
next
(
iterator
),))
# outputs: current batch logits as a tuple of shard logits
...
...
@@ -314,8 +322,7 @@ def get_predictions_and_labels(strategy,
tf
.
experimental
.
async_clear_error
()
return
preds
,
golds
test_iter
=
iter
(
strategy
.
experimental_distribute_datasets_from_function
(
eval_input_fn
))
test_iter
=
iter
(
strategy
.
distribute_datasets_from_function
(
eval_input_fn
))
predictions
,
labels
=
_run_evaluation
(
test_iter
)
return
predictions
,
labels
...
...
@@ -341,9 +348,12 @@ def export_classifier(model_export_path, input_meta_data, bert_config,
raise
ValueError
(
'Export path is not specified: %s'
%
model_dir
)
# Export uses float32 for now, even if training uses mixed precision.
tf
.
keras
.
mixed_precision
.
experimental
.
set
_policy
(
'float32'
)
tf
.
keras
.
mixed_precision
.
set_global
_policy
(
'float32'
)
classifier_model
=
bert_models
.
classifier_model
(
bert_config
,
input_meta_data
.
get
(
'num_labels'
,
1
))[
0
]
bert_config
,
input_meta_data
.
get
(
'num_labels'
,
1
),
hub_module_url
=
FLAGS
.
hub_module_url
,
hub_module_trainable
=
False
)[
0
]
model_saving_utils
.
export_bert_model
(
model_export_path
,
model
=
classifier_model
,
checkpoint_dir
=
model_dir
)
...
...
@@ -365,6 +375,9 @@ def run_bert(strategy,
epochs
=
FLAGS
.
num_train_epochs
*
FLAGS
.
num_eval_per_epoch
train_data_size
=
(
input_meta_data
[
'train_data_size'
]
//
FLAGS
.
num_eval_per_epoch
)
if
FLAGS
.
train_data_size
:
train_data_size
=
min
(
train_data_size
,
FLAGS
.
train_data_size
)
logging
.
info
(
'Updated train_data_size: %s'
,
train_data_size
)
steps_per_epoch
=
int
(
train_data_size
/
FLAGS
.
train_batch_size
)
warmup_steps
=
int
(
epochs
*
train_data_size
*
0.1
/
FLAGS
.
train_batch_size
)
eval_steps
=
int
(
...
...
@@ -430,7 +443,7 @@ def custom_main(custom_callbacks=None, custom_metrics=None):
FLAGS
.
model_dir
)
return
strategy
=
distribut
ion
_utils
.
get_distribution_strategy
(
strategy
=
distribut
e
_utils
.
get_distribution_strategy
(
distribution_strategy
=
FLAGS
.
distribution_strategy
,
num_gpus
=
FLAGS
.
num_gpus
,
tpu_address
=
FLAGS
.
tpu
)
...
...
@@ -443,9 +456,10 @@ def custom_main(custom_callbacks=None, custom_metrics=None):
include_sample_weights
=
include_sample_weights
)
if
FLAGS
.
mode
==
'predict'
:
num_labels
=
input_meta_data
.
get
(
'num_labels'
,
1
)
with
strategy
.
scope
():
classifier_model
=
bert_models
.
classifier_model
(
bert_config
,
input_meta_data
[
'
num_labels
'
]
)[
0
]
bert_config
,
num_labels
)[
0
]
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
classifier_model
)
latest_checkpoint_file
=
(
FLAGS
.
predict_checkpoint_path
or
...
...
@@ -456,7 +470,11 @@ def custom_main(custom_callbacks=None, custom_metrics=None):
checkpoint
.
restore
(
latest_checkpoint_file
).
assert_existing_objects_matched
()
preds
,
_
=
get_predictions_and_labels
(
strategy
,
classifier_model
,
eval_input_fn
,
return_probs
=
True
)
strategy
,
classifier_model
,
eval_input_fn
,
is_regression
=
(
num_labels
==
1
),
return_probs
=
True
)
output_predict_file
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'test_results.tsv'
)
with
tf
.
io
.
gfile
.
GFile
(
output_predict_file
,
'w'
)
as
writer
:
logging
.
info
(
'***** Predict results *****'
)
...
...
@@ -475,7 +493,8 @@ def custom_main(custom_callbacks=None, custom_metrics=None):
FLAGS
.
train_batch_size
,
is_training
=
True
,
label_type
=
label_type
,
include_sample_weights
=
include_sample_weights
)
include_sample_weights
=
include_sample_weights
,
num_samples
=
FLAGS
.
train_data_size
)
run_bert
(
strategy
,
input_meta_data
,
...
...
official/nlp/bert/run_pretraining.py
View file @
f16a7b5b
# Copyright 201
9
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,17 +11,16 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Run masked LM/next sentence pre-training for BERT in TF 2.x."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
# Import libraries
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
gin
import
tensorflow
as
tf
from
official.common
import
distribute_utils
from
official.modeling
import
performance
from
official.nlp
import
optimization
from
official.nlp.bert
import
bert_models
...
...
@@ -29,7 +28,6 @@ from official.nlp.bert import common_flags
from
official.nlp.bert
import
configs
from
official.nlp.bert
import
input_pipeline
from
official.nlp.bert
import
model_training_utils
from
official.utils.misc
import
distribution_utils
flags
.
DEFINE_string
(
'input_files'
,
None
,
...
...
@@ -105,7 +103,11 @@ def run_customized_training(strategy,
train_batch_size
,
use_next_sentence_label
=
True
,
train_summary_interval
=
0
,
custom_callbacks
=
None
):
custom_callbacks
=
None
,
explicit_allreduce
=
False
,
pre_allreduce_callbacks
=
None
,
post_allreduce_callbacks
=
None
,
allreduce_bytes_per_pack
=
0
):
"""Run BERT pretrain model training using low-level API."""
train_input_fn
=
get_pretrain_dataset_fn
(
input_files
,
max_seq_length
,
...
...
@@ -139,6 +141,10 @@ def run_customized_training(strategy,
steps_per_loop
=
steps_per_loop
,
epochs
=
epochs
,
sub_model_export_name
=
'pretrained/bert_model'
,
explicit_allreduce
=
explicit_allreduce
,
pre_allreduce_callbacks
=
pre_allreduce_callbacks
,
post_allreduce_callbacks
=
post_allreduce_callbacks
,
allreduce_bytes_per_pack
=
allreduce_bytes_per_pack
,
train_summary_interval
=
train_summary_interval
,
custom_callbacks
=
custom_callbacks
)
...
...
@@ -158,6 +164,12 @@ def run_bert_pretrain(strategy, custom_callbacks=None):
performance
.
set_mixed_precision_policy
(
common_flags
.
dtype
())
# Only when explicit_allreduce = True, post_allreduce_callbacks and
# allreduce_bytes_per_pack will take effect. optimizer.apply_gradients() no
# longer implicitly allreduce gradients, users manually allreduce gradient and
# pass the allreduced grads_and_vars to apply_gradients().
# With explicit_allreduce = True, clip_by_global_norm is moved to after
# allreduce.
return
run_customized_training
(
strategy
,
bert_config
,
...
...
@@ -176,16 +188,25 @@ def run_bert_pretrain(strategy, custom_callbacks=None):
FLAGS
.
train_batch_size
,
FLAGS
.
use_next_sentence_label
,
FLAGS
.
train_summary_interval
,
custom_callbacks
=
custom_callbacks
)
custom_callbacks
=
custom_callbacks
,
explicit_allreduce
=
FLAGS
.
explicit_allreduce
,
pre_allreduce_callbacks
=
[
model_training_utils
.
clip_by_global_norm_callback
],
allreduce_bytes_per_pack
=
FLAGS
.
allreduce_bytes_per_pack
)
def
main
(
_
):
gin
.
parse_config_files_and_bindings
(
FLAGS
.
gin_file
,
FLAGS
.
gin_param
)
if
not
FLAGS
.
model_dir
:
FLAGS
.
model_dir
=
'/tmp/bert20/'
strategy
=
distribution_utils
.
get_distribution_strategy
(
# Configures cluster spec for multi-worker distribution strategy.
if
FLAGS
.
num_gpus
>
0
:
_
=
distribute_utils
.
configure_cluster
(
FLAGS
.
worker_hosts
,
FLAGS
.
task_index
)
strategy
=
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
FLAGS
.
distribution_strategy
,
num_gpus
=
FLAGS
.
num_gpus
,
all_reduce_alg
=
FLAGS
.
all_reduce_alg
,
tpu_address
=
FLAGS
.
tpu
)
if
strategy
:
print
(
'***** Number of cores used : '
,
strategy
.
num_replicas_in_sync
)
...
...
official/nlp/bert/run_squad.py
View file @
f16a7b5b
# Copyright 201
9
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,28 +11,24 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Run BERT on SQuAD 1.1 and SQuAD 2.0 in TF 2.x."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
"""Run BERT on SQuAD 1.1 and SQuAD 2.0 in TF 2.x."""
import
json
import
os
import
time
# Import libraries
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
gin
import
tensorflow
as
tf
from
official.common
import
distribute_utils
from
official.nlp.bert
import
configs
as
bert_configs
from
official.nlp.bert
import
run_squad_helper
from
official.nlp.bert
import
tokenization
from
official.nlp.data
import
squad_lib
as
squad_lib_wp
from
official.utils.misc
import
distribution_utils
from
official.utils.misc
import
keras_utils
...
...
@@ -104,9 +100,8 @@ def main(_):
# Configures cluster spec for multi-worker distribution strategy.
if
FLAGS
.
num_gpus
>
0
:
_
=
distribution_utils
.
configure_cluster
(
FLAGS
.
worker_hosts
,
FLAGS
.
task_index
)
strategy
=
distribution_utils
.
get_distribution_strategy
(
_
=
distribute_utils
.
configure_cluster
(
FLAGS
.
worker_hosts
,
FLAGS
.
task_index
)
strategy
=
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
FLAGS
.
distribution_strategy
,
num_gpus
=
FLAGS
.
num_gpus
,
all_reduce_alg
=
FLAGS
.
all_reduce_alg
,
...
...
official/nlp/bert/run_squad_helper.py
View file @
f16a7b5b
# Copyright 201
9
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,15 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Library for running BERT family models on SQuAD 1.1/2.0 in TF 2.x."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
collections
import
json
import
os
from
absl
import
flags
from
absl
import
logging
import
tensorflow
as
tf
...
...
@@ -39,10 +37,10 @@ from official.utils.misc import keras_utils
def
define_common_squad_flags
():
"""Defines common flags used by SQuAD tasks."""
flags
.
DEFINE_enum
(
'mode'
,
'train_and_eval'
,
[
'train_and_eval'
,
'train_and_predict'
,
'train'
,
'eval'
,
'predict'
,
'export_only'
],
'One of {"train_and_eval", "train_and_predict", '
'mode'
,
'train_and_eval'
,
[
'train_and_eval'
,
'train_and_predict'
,
'train'
,
'eval'
,
'predict'
,
'export_only'
],
'One of {"train_and_eval", "train_and_predict", '
'"train", "eval", "predict", "export_only"}. '
'`train_and_eval`: train & predict to json files & compute eval metrics. '
'`train_and_predict`: train & predict to json files. '
...
...
@@ -60,12 +58,12 @@ def define_common_squad_flags():
# Model training specific flags.
flags
.
DEFINE_integer
(
'train_batch_size'
,
32
,
'Total batch size for training.'
)
# Predict processing related.
flags
.
DEFINE_string
(
'predict_file'
,
None
,
'SQuAD prediction json file path. '
'`predict` mode supports multiple files: one can use '
'wildcard to specify multiple files and it can also be '
'multiple file patterns separated by comma. Note that '
'`eval` mode only supports a single predict file.'
)
flags
.
DEFINE_string
(
'predict_file'
,
None
,
'SQuAD prediction json file path. '
'`predict` mode supports multiple files: one can use '
'wildcard to specify multiple files and it can also be '
'multiple file patterns separated by comma. Note that '
'`eval` mode only supports a single predict file.'
)
flags
.
DEFINE_bool
(
'do_lower_case'
,
True
,
'Whether to lower case the input text. Should be True for uncased '
...
...
@@ -97,10 +95,7 @@ def define_common_squad_flags():
FLAGS
=
flags
.
FLAGS
def
squad_loss_fn
(
start_positions
,
end_positions
,
start_logits
,
end_logits
):
def
squad_loss_fn
(
start_positions
,
end_positions
,
start_logits
,
end_logits
):
"""Returns sparse categorical crossentropy for start/end logits."""
start_loss
=
tf
.
keras
.
losses
.
sparse_categorical_crossentropy
(
start_positions
,
start_logits
,
from_logits
=
True
)
...
...
@@ -118,11 +113,8 @@ def get_loss_fn():
start_positions
=
labels
[
'start_positions'
]
end_positions
=
labels
[
'end_positions'
]
start_logits
,
end_logits
=
model_outputs
return
squad_loss_fn
(
start_positions
,
end_positions
,
start_logits
,
end_logits
)
return
squad_loss_fn
(
start_positions
,
end_positions
,
start_logits
,
end_logits
)
return
_loss_fn
...
...
@@ -168,7 +160,7 @@ def get_squad_model_to_predict(strategy, bert_config, checkpoint_path,
"""Gets a squad model to make predictions."""
with
strategy
.
scope
():
# Prediction always uses float32, even if training uses mixed precision.
tf
.
keras
.
mixed_precision
.
experimental
.
set
_policy
(
'float32'
)
tf
.
keras
.
mixed_precision
.
set_global
_policy
(
'float32'
)
squad_model
,
_
=
bert_models
.
squad_model
(
bert_config
,
input_meta_data
[
'max_seq_length'
],
...
...
@@ -182,11 +174,8 @@ def get_squad_model_to_predict(strategy, bert_config, checkpoint_path,
return
squad_model
def
predict_squad_customized
(
strategy
,
input_meta_data
,
predict_tfrecord_path
,
num_steps
,
squad_model
):
def
predict_squad_customized
(
strategy
,
input_meta_data
,
predict_tfrecord_path
,
num_steps
,
squad_model
):
"""Make predictions using a Bert-based squad model."""
predict_dataset_fn
=
get_dataset_fn
(
predict_tfrecord_path
,
...
...
@@ -194,8 +183,7 @@ def predict_squad_customized(strategy,
FLAGS
.
predict_batch_size
,
is_training
=
False
)
predict_iterator
=
iter
(
strategy
.
experimental_distribute_datasets_from_function
(
predict_dataset_fn
))
strategy
.
distribute_datasets_from_function
(
predict_dataset_fn
))
@
tf
.
function
def
predict_step
(
iterator
):
...
...
@@ -259,8 +247,7 @@ def train_squad(strategy,
hub_module_trainable
=
FLAGS
.
hub_module_trainable
)
optimizer
=
optimization
.
create_optimizer
(
FLAGS
.
learning_rate
,
steps_per_epoch
*
epochs
,
warmup_steps
,
FLAGS
.
end_lr
,
warmup_steps
,
FLAGS
.
end_lr
,
FLAGS
.
optimizer_type
)
squad_model
.
optimizer
=
performance
.
configure_optimizer
(
...
...
@@ -269,15 +256,12 @@ def train_squad(strategy,
use_graph_rewrite
=
common_flags
.
use_graph_rewrite
())
return
squad_model
,
core_model
# If explicit_allreduce = True, apply_gradients() no longer implicitly
# allreduce gradients, users manually allreduce gradient and pass the
# allreduced grads_and_vars to apply_gradients(). clip_by_global_norm will be
# applied to allreduced gradients.
def
clip_by_global_norm_callback
(
grads_and_vars
):
grads
,
variables
=
zip
(
*
grads_and_vars
)
(
clipped_grads
,
_
)
=
tf
.
clip_by_global_norm
(
grads
,
clip_norm
=
1.0
)
return
zip
(
clipped_grads
,
variables
)
# Only when explicit_allreduce = True, post_allreduce_callbacks and
# allreduce_bytes_per_pack will take effect. optimizer.apply_gradients() no
# longer implicitly allreduce gradients, users manually allreduce gradient and
# pass the allreduced grads_and_vars to apply_gradients().
# With explicit_allreduce = True, clip_by_global_norm is moved to after
# allreduce.
model_training_utils
.
run_customized_training_loop
(
strategy
=
strategy
,
model_fn
=
_get_squad_model
,
...
...
@@ -291,8 +275,11 @@ def train_squad(strategy,
sub_model_export_name
=
sub_model_export_name
,
run_eagerly
=
run_eagerly
,
custom_callbacks
=
custom_callbacks
,
explicit_allreduce
=
False
,
post_allreduce_callbacks
=
[
clip_by_global_norm_callback
])
explicit_allreduce
=
FLAGS
.
explicit_allreduce
,
pre_allreduce_callbacks
=
[
model_training_utils
.
clip_by_global_norm_callback
],
allreduce_bytes_per_pack
=
FLAGS
.
allreduce_bytes_per_pack
)
def
prediction_output_squad
(
strategy
,
input_meta_data
,
tokenizer
,
squad_lib
,
...
...
@@ -344,8 +331,9 @@ def prediction_output_squad(strategy, input_meta_data, tokenizer, squad_lib,
logging
.
info
(
' Batch size = %d'
,
FLAGS
.
predict_batch_size
)
num_steps
=
int
(
dataset_size
/
FLAGS
.
predict_batch_size
)
all_results
=
predict_squad_customized
(
strategy
,
input_meta_data
,
eval_writer
.
filename
,
num_steps
,
squad_model
)
all_results
=
predict_squad_customized
(
strategy
,
input_meta_data
,
eval_writer
.
filename
,
num_steps
,
squad_model
)
all_predictions
,
all_nbest_json
,
scores_diff_json
=
(
squad_lib
.
postprocess_output
(
...
...
@@ -362,8 +350,12 @@ def prediction_output_squad(strategy, input_meta_data, tokenizer, squad_lib,
return
all_predictions
,
all_nbest_json
,
scores_diff_json
def
dump_to_files
(
all_predictions
,
all_nbest_json
,
scores_diff_json
,
squad_lib
,
version_2_with_negative
,
file_prefix
=
''
):
def
dump_to_files
(
all_predictions
,
all_nbest_json
,
scores_diff_json
,
squad_lib
,
version_2_with_negative
,
file_prefix
=
''
):
"""Save output to json files."""
output_prediction_file
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'%spredictions.json'
%
file_prefix
)
...
...
@@ -452,8 +444,7 @@ def eval_squad(strategy,
dataset_json
=
json
.
load
(
reader
)
pred_dataset
=
dataset_json
[
'data'
]
if
input_meta_data
.
get
(
'version_2_with_negative'
,
False
):
eval_metrics
=
squad_evaluate_v2_0
.
evaluate
(
pred_dataset
,
all_predictions
,
eval_metrics
=
squad_evaluate_v2_0
.
evaluate
(
pred_dataset
,
all_predictions
,
scores_diff_json
)
else
:
eval_metrics
=
squad_evaluate_v1_1
.
evaluate
(
pred_dataset
,
all_predictions
)
...
...
@@ -474,7 +465,7 @@ def export_squad(model_export_path, input_meta_data, bert_config):
if
not
model_export_path
:
raise
ValueError
(
'Export path is not specified: %s'
%
model_export_path
)
# Export uses float32 for now, even if training uses mixed precision.
tf
.
keras
.
mixed_precision
.
experimental
.
set
_policy
(
'float32'
)
tf
.
keras
.
mixed_precision
.
set_global
_policy
(
'float32'
)
squad_model
,
_
=
bert_models
.
squad_model
(
bert_config
,
input_meta_data
[
'max_seq_length'
])
model_saving_utils
.
export_bert_model
(
...
...
official/nlp/bert/serving.py
View file @
f16a7b5b
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -12,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Examples of SavedModel export for tf-serving."""
from
absl
import
app
...
...
@@ -22,11 +21,11 @@ import tensorflow as tf
from
official.nlp.bert
import
bert_models
from
official.nlp.bert
import
configs
flags
.
DEFINE_integer
(
"sequence_length"
,
None
,
"Sequence length to parse the tf.Example. If "
"sequence_length > 0, add a signature for serialized "
"tf.Example and define the parsing specification by the "
"sequence_length."
)
flags
.
DEFINE_integer
(
"sequence_length"
,
None
,
"Sequence length to parse the tf.Example. If "
"sequence_length > 0, add a signature for serialized "
"tf.Example and define the parsing specification by the "
"sequence_length."
)
flags
.
DEFINE_string
(
"bert_config_file"
,
None
,
"Bert configuration file to define core bert layers."
)
flags
.
DEFINE_string
(
"model_checkpoint_path"
,
None
,
...
...
official/nlp/bert/squad_evaluate_v1_1.py
View file @
f16a7b5b
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
...
...
@@ -10,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Evaluation of SQuAD predictions (version 1.1).
The functions are copied from
...
...
@@ -22,15 +23,12 @@ Pranav Rajpurkar, Jian Zhang, Konstantin Lopyrev, Percy Liang
https://nlp.stanford.edu/pubs/rajpurkar2016squad.pdf
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
collections
import
re
import
string
# pylint: disable=g-bad-import-order
from
absl
import
logging
# pylint: enable=g-bad-import-order
...
...
official/nlp/bert/squad_evaluate_v2_0.py
View file @
f16a7b5b
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
...
...
@@ -10,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Evaluation script for SQuAD version 2.0.
The functions are copied and modified from
...
...
@@ -22,10 +23,6 @@ This file is expected to map question ID's to the model's predicted probability
that a question is unanswerable.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
collections
import
re
import
string
...
...
official/nlp/bert/tf1_checkpoint_converter_lib.py
View file @
f16a7b5b
# Copyright 201
9
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,11 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Convert checkpoints created by Estimator (tf1) to be Keras compatible."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
# TF 1.x
...
...
@@ -53,6 +50,7 @@ BERT_V2_NAME_REPLACEMENTS = (
(
"output/dense"
,
"output"
),
(
"output/LayerNorm"
,
"output_layer_norm"
),
(
"pooler/dense"
,
"pooler_transform"
),
(
"cls/predictions"
,
"bert/cls/predictions"
),
(
"cls/predictions/output_bias"
,
"cls/predictions/output_bias/bias"
),
(
"cls/seq_relationship/output_bias"
,
"predictions/transform/logits/bias"
),
(
"cls/seq_relationship/output_weights"
,
...
...
@@ -111,11 +109,20 @@ def _get_new_shape(name, shape, num_heads):
return
None
def
create_v2_checkpoint
(
model
,
src_checkpoint
,
output_path
):
def
create_v2_checkpoint
(
model
,
src_checkpoint
,
output_path
,
checkpoint_model_name
=
"model"
):
"""Converts a name-based matched TF V1 checkpoint to TF V2 checkpoint."""
# Uses streaming-restore in eager model to read V1 name-based checkpoints.
model
.
load_weights
(
src_checkpoint
).
assert_existing_objects_matched
()
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
model
)
if
hasattr
(
model
,
"checkpoint_items"
):
checkpoint_items
=
model
.
checkpoint_items
else
:
checkpoint_items
=
{}
checkpoint_items
[
checkpoint_model_name
]
=
model
checkpoint
=
tf
.
train
.
Checkpoint
(
**
checkpoint_items
)
checkpoint
.
save
(
output_path
)
...
...
@@ -164,7 +171,6 @@ def convert(checkpoint_from_path,
new_shape
=
_get_new_shape
(
new_var_name
,
tensor
.
shape
,
num_heads
)
if
new_shape
:
tf
.
logging
.
info
(
"Veriable %s has a shape change from %s to %s"
,
var_name
,
tensor
.
shape
,
new_shape
)
tensor
=
np
.
reshape
(
tensor
,
new_shape
)
...
...
official/nlp/bert/tf2_encoder_checkpoint_converter.py
View file @
f16a7b5b
# Copyright 201
9
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,15 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A converter from a V1 BERT encoder checkpoint to a V2 encoder checkpoint.
The conversion will yield an object-oriented checkpoint that can be used
to restore a TransformerEncoder object.
to restore a BertEncoder or BertPretrainerV2 object (see the `converted_model`
FLAG below).
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
...
...
@@ -27,9 +25,10 @@ from absl import app
from
absl
import
flags
import
tensorflow
as
tf
from
official.modeling
import
activation
s
from
official.modeling
import
tf_util
s
from
official.nlp.bert
import
configs
from
official.nlp.bert
import
tf1_checkpoint_converter_lib
from
official.nlp.modeling
import
models
from
official.nlp.modeling
import
networks
FLAGS
=
flags
.
FLAGS
...
...
@@ -42,6 +41,14 @@ flags.DEFINE_string(
"BertModel, with no task heads.)"
)
flags
.
DEFINE_string
(
"converted_checkpoint_path"
,
None
,
"Name for the created object-based V2 checkpoint."
)
flags
.
DEFINE_string
(
"checkpoint_model_name"
,
"encoder"
,
"The name of the model when saving the checkpoint, i.e., "
"the checkpoint will be saved using: "
"tf.train.Checkpoint(FLAGS.checkpoint_model_name=model)."
)
flags
.
DEFINE_enum
(
"converted_model"
,
"encoder"
,
[
"encoder"
,
"pretrainer"
],
"Whether to convert the checkpoint to a `BertEncoder` model or a "
"`BertPretrainerV2` model (with mlm but without classification heads)."
)
def
_create_bert_model
(
cfg
):
...
...
@@ -49,19 +56,20 @@ def _create_bert_model(cfg):
Args:
cfg: A `BertConfig` to create the core model.
Returns:
A
Transform
erEncoder net
o
work.
A
B
er
t
Encoder network.
"""
bert_encoder
=
networks
.
Transform
erEncoder
(
bert_encoder
=
networks
.
B
er
t
Encoder
(
vocab_size
=
cfg
.
vocab_size
,
hidden_size
=
cfg
.
hidden_size
,
num_layers
=
cfg
.
num_hidden_layers
,
num_attention_heads
=
cfg
.
num_attention_heads
,
intermediate_size
=
cfg
.
intermediate_size
,
activation
=
activations
.
gelu
,
activation
=
tf_utils
.
get_activation
(
cfg
.
hidden_act
)
,
dropout_rate
=
cfg
.
hidden_dropout_prob
,
attention_dropout_rate
=
cfg
.
attention_probs_dropout_prob
,
sequence_length
=
cfg
.
max_position_embeddings
,
max_
sequence_length
=
cfg
.
max_position_embeddings
,
type_vocab_size
=
cfg
.
type_vocab_size
,
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
cfg
.
initializer_range
),
...
...
@@ -70,13 +78,39 @@ def _create_bert_model(cfg):
return
bert_encoder
def
convert_checkpoint
(
bert_config
,
output_path
,
v1_checkpoint
):
def
_create_bert_pretrainer_model
(
cfg
):
"""Creates a BERT keras core model from BERT configuration.
Args:
cfg: A `BertConfig` to create the core model.
Returns:
A BertPretrainerV2 model.
"""
bert_encoder
=
_create_bert_model
(
cfg
)
pretrainer
=
models
.
BertPretrainerV2
(
encoder_network
=
bert_encoder
,
mlm_activation
=
tf_utils
.
get_activation
(
cfg
.
hidden_act
),
mlm_initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
cfg
.
initializer_range
))
# Makes sure the pretrainer variables are created.
_
=
pretrainer
(
pretrainer
.
inputs
)
return
pretrainer
def
convert_checkpoint
(
bert_config
,
output_path
,
v1_checkpoint
,
checkpoint_model_name
=
"model"
,
converted_model
=
"encoder"
):
"""Converts a V1 checkpoint into an OO V2 checkpoint."""
output_dir
,
_
=
os
.
path
.
split
(
output_path
)
tf
.
io
.
gfile
.
makedirs
(
output_dir
)
# Create a temporary V1 name-converted checkpoint in the output directory.
temporary_checkpoint_dir
=
os
.
path
.
join
(
output_dir
,
"temp_v1"
)
temporary_checkpoint
=
os
.
path
.
join
(
temporary_checkpoint_dir
,
"ckpt"
)
tf1_checkpoint_converter_lib
.
convert
(
checkpoint_from_path
=
v1_checkpoint
,
checkpoint_to_path
=
temporary_checkpoint
,
...
...
@@ -85,10 +119,17 @@ def convert_checkpoint(bert_config, output_path, v1_checkpoint):
permutations
=
tf1_checkpoint_converter_lib
.
BERT_V2_PERMUTATIONS
,
exclude_patterns
=
[
"adam"
,
"Adam"
])
if
converted_model
==
"encoder"
:
model
=
_create_bert_model
(
bert_config
)
elif
converted_model
==
"pretrainer"
:
model
=
_create_bert_pretrainer_model
(
bert_config
)
else
:
raise
ValueError
(
"Unsupported converted_model: %s"
%
converted_model
)
# Create a V2 checkpoint from the temporary checkpoint.
model
=
_create_bert_model
(
bert_config
)
tf1_checkpoint_converter_lib
.
create_v2_checkpoint
(
model
,
temporary_checkpoint
,
output_path
)
output_path
,
checkpoint_model_name
)
# Clean up the temporary checkpoint, if it exists.
try
:
...
...
@@ -98,11 +139,21 @@ def convert_checkpoint(bert_config, output_path, v1_checkpoint):
pass
def
main
(
_
):
def
main
(
argv
):
if
len
(
argv
)
>
1
:
raise
app
.
UsageError
(
"Too many command-line arguments."
)
output_path
=
FLAGS
.
converted_checkpoint_path
v1_checkpoint
=
FLAGS
.
checkpoint_to_convert
checkpoint_model_name
=
FLAGS
.
checkpoint_model_name
converted_model
=
FLAGS
.
converted_model
bert_config
=
configs
.
BertConfig
.
from_json_file
(
FLAGS
.
bert_config_file
)
convert_checkpoint
(
bert_config
,
output_path
,
v1_checkpoint
)
convert_checkpoint
(
bert_config
=
bert_config
,
output_path
=
output_path
,
v1_checkpoint
=
v1_checkpoint
,
checkpoint_model_name
=
checkpoint_model_name
,
converted_model
=
converted_model
)
if
__name__
==
"__main__"
:
...
...
official/nlp/bert/tokenization.py
View file @
f16a7b5b
# coding=utf-8
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -12,17 +11,14 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# coding=utf-8
"""Tokenization classes implementation.
The file is forked from:
https://github.com/google-research/bert/blob/master/tokenization.py.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
collections
import
re
import
unicodedata
...
...
@@ -421,7 +417,7 @@ def preprocess_text(inputs, remove_space=True, lower=False):
"""Preprocesses data by removing extra space and normalize data.
This method is used together with sentence piece tokenizer and is forked from:
https://github.com/google-research/google-research/blob/
master
/albert/tokenization.py
https://github.com/google-research/google-research/blob/
e1f6fa00
/albert/tokenization.py
Args:
inputs: The input text.
...
...
@@ -454,7 +450,7 @@ def encode_pieces(sp_model, text, sample=False):
"""Segements text into pieces.
This method is used together with sentence piece tokenizer and is forked from:
https://github.com/google-research/google-research/blob/
master
/albert/tokenization.py
https://github.com/google-research/google-research/blob/
e1f6fa00
/albert/tokenization.py
Args:
...
...
@@ -496,7 +492,7 @@ def encode_ids(sp_model, text, sample=False):
"""Segments text and return token ids.
This method is used together with sentence piece tokenizer and is forked from:
https://github.com/google-research/google-research/blob/
master
/albert/tokenization.py
https://github.com/google-research/google-research/blob/
e1f6fa00
/albert/tokenization.py
Args:
sp_model: A spm.SentencePieceProcessor object.
...
...
official/nlp/bert/tokenization_test.py
View file @
f16a7b5b
# Copyright 201
9
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,10 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
tempfile
...
...
official/nlp/configs/__init__.py
View file @
f16a7b5b
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
official/nlp/configs/bert.py
View file @
f16a7b5b
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -12,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Multi-head BERT encoder network with classification heads.
Includes configurations and instantiation methods.
...
...
@@ -20,13 +19,9 @@ Includes configurations and instantiation methods.
from
typing
import
List
,
Optional
,
Text
import
dataclasses
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.modeling.hyperparams
import
base_config
from
official.nlp.configs
import
encoders
from
official.nlp.modeling
import
layers
from
official.nlp.modeling.models
import
bert_pretrainer
@
dataclasses
.
dataclass
...
...
@@ -40,32 +35,9 @@ class ClsHeadConfig(base_config.Config):
@
dataclasses
.
dataclass
class
BertPretrainerConfig
(
base_config
.
Config
):
"""BERT encoder configuration."""
encoder
:
encoders
.
TransformerEncoderConfig
=
(
encoders
.
TransformerEncoderConfig
())
class
PretrainerConfig
(
base_config
.
Config
):
"""Pretrainer configuration."""
encoder
:
encoders
.
EncoderConfig
=
encoders
.
EncoderConfig
()
cls_heads
:
List
[
ClsHeadConfig
]
=
dataclasses
.
field
(
default_factory
=
list
)
def
instantiate_classification_heads_from_cfgs
(
cls_head_configs
:
List
[
ClsHeadConfig
])
->
List
[
layers
.
ClassificationHead
]:
return
[
layers
.
ClassificationHead
(
**
cfg
.
as_dict
())
for
cfg
in
cls_head_configs
]
if
cls_head_configs
else
[]
def
instantiate_pretrainer_from_cfg
(
config
:
BertPretrainerConfig
,
encoder_network
:
Optional
[
tf
.
keras
.
Model
]
=
None
)
->
bert_pretrainer
.
BertPretrainerV2
:
"""Instantiates a BertPretrainer from the config."""
encoder_cfg
=
config
.
encoder
if
encoder_network
is
None
:
encoder_network
=
encoders
.
instantiate_encoder_from_cfg
(
encoder_cfg
)
return
bert_pretrainer
.
BertPretrainerV2
(
mlm_activation
=
tf_utils
.
get_activation
(
encoder_cfg
.
hidden_activation
),
mlm_initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
encoder_cfg
.
initializer_range
),
encoder_network
=
encoder_network
,
classification_heads
=
instantiate_classification_heads_from_cfgs
(
config
.
cls_heads
))
mlm_activation
:
str
=
"gelu"
mlm_initializer_range
:
float
=
0.02
Prev
1
…
4
5
6
7
8
9
10
11
12
…
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment