Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
83f0a576
Commit
83f0a576
authored
Nov 22, 2019
by
Chen Chen
Committed by
saberkun
Nov 22, 2019
Browse files
Internal change
PiperOrigin-RevId: 282096004
parent
986ffac4
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
64 additions
and
211 deletions
+64
-211
official/modeling/model_training_utils.py
official/modeling/model_training_utils.py
+23
-2
official/nlp/bert/model_saving_utils.py
official/nlp/bert/model_saving_utils.py
+0
-31
official/nlp/bert/run_pretraining.py
official/nlp/bert/run_pretraining.py
+2
-9
official/nlp/bert_models.py
official/nlp/bert_models.py
+39
-169
No files found.
official/modeling/model_training_utils.py
View file @
83f0a576
...
@@ -94,7 +94,8 @@ def run_customized_training_loop(
...
@@ -94,7 +94,8 @@ def run_customized_training_loop(
metric_fn
=
None
,
metric_fn
=
None
,
init_checkpoint
=
None
,
init_checkpoint
=
None
,
custom_callbacks
=
None
,
custom_callbacks
=
None
,
run_eagerly
=
False
):
run_eagerly
=
False
,
sub_model_export_name
=
None
):
"""Run BERT pretrain model training using low-level API.
"""Run BERT pretrain model training using low-level API.
Arguments:
Arguments:
...
@@ -131,6 +132,11 @@ def run_customized_training_loop(
...
@@ -131,6 +132,11 @@ def run_customized_training_loop(
methods are invoked during training.
methods are invoked during training.
run_eagerly: Whether to run model training in pure eager execution. This
run_eagerly: Whether to run model training in pure eager execution. This
should be disable for TPUStrategy.
should be disable for TPUStrategy.
sub_model_export_name: If not None, will export `sub_model` returned by
`model_fn` into checkpoint files. The name of intermediate checkpoint
file is {sub_model_export_name}_step_{step}.ckpt and the last
checkpint's name is {sub_model_export_name}.ckpt;
if None, `sub_model` will not be exported as checkpoint.
Returns:
Returns:
Trained model.
Trained model.
...
@@ -139,6 +145,8 @@ def run_customized_training_loop(
...
@@ -139,6 +145,8 @@ def run_customized_training_loop(
ValueError: (1) When model returned by `model_fn` does not have optimizer
ValueError: (1) When model returned by `model_fn` does not have optimizer
attribute or when required parameters are set to none. (2) eval args are
attribute or when required parameters are set to none. (2) eval args are
not specified correctly. (3) metric_fn must be a callable if specified.
not specified correctly. (3) metric_fn must be a callable if specified.
(4) sub_model_checkpoint_name is specified, but `sub_model` returned
by `model_fn` is None.
"""
"""
if
_sentinel
is
not
None
:
if
_sentinel
is
not
None
:
...
@@ -191,6 +199,10 @@ def run_customized_training_loop(
...
@@ -191,6 +199,10 @@ def run_customized_training_loop(
if
not
hasattr
(
model
,
'optimizer'
):
if
not
hasattr
(
model
,
'optimizer'
):
raise
ValueError
(
'User should set optimizer attribute to model '
raise
ValueError
(
'User should set optimizer attribute to model '
'inside `model_fn`.'
)
'inside `model_fn`.'
)
if
sub_model_export_name
and
sub_model
is
None
:
raise
ValueError
(
'sub_model_export_name is specified as %s, but '
'sub_model is None.'
%
sub_model_export_name
)
optimizer
=
model
.
optimizer
optimizer
=
model
.
optimizer
use_float16
=
isinstance
(
use_float16
=
isinstance
(
optimizer
,
tf
.
keras
.
mixed_precision
.
experimental
.
LossScaleOptimizer
)
optimizer
,
tf
.
keras
.
mixed_precision
.
experimental
.
LossScaleOptimizer
)
...
@@ -326,6 +338,9 @@ def run_customized_training_loop(
...
@@ -326,6 +338,9 @@ def run_customized_training_loop(
# Training loop starts here.
# Training loop starts here.
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
model
,
optimizer
=
optimizer
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
model
,
optimizer
=
optimizer
)
sub_model_checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
sub_model
)
if
sub_model_export_name
else
None
latest_checkpoint_file
=
tf
.
train
.
latest_checkpoint
(
model_dir
)
latest_checkpoint_file
=
tf
.
train
.
latest_checkpoint
(
model_dir
)
if
latest_checkpoint_file
:
if
latest_checkpoint_file
:
logging
.
info
(
logging
.
info
(
...
@@ -382,7 +397,10 @@ def run_customized_training_loop(
...
@@ -382,7 +397,10 @@ def run_customized_training_loop(
if
current_step
<
total_training_steps
:
if
current_step
<
total_training_steps
:
_save_checkpoint
(
checkpoint
,
model_dir
,
_save_checkpoint
(
checkpoint
,
model_dir
,
checkpoint_name
.
format
(
step
=
current_step
))
checkpoint_name
.
format
(
step
=
current_step
))
if
sub_model_export_name
:
_save_checkpoint
(
sub_model_checkpoint
,
model_dir
,
'%s_step_%d.ckpt'
%
(
sub_model_export_name
,
current_step
))
if
eval_input_fn
:
if
eval_input_fn
:
logging
.
info
(
'Running evaluation after step: %s.'
,
current_step
)
logging
.
info
(
'Running evaluation after step: %s.'
,
current_step
)
_run_evaluation
(
current_step
,
_run_evaluation
(
current_step
,
...
@@ -393,6 +411,9 @@ def run_customized_training_loop(
...
@@ -393,6 +411,9 @@ def run_customized_training_loop(
_save_checkpoint
(
checkpoint
,
model_dir
,
_save_checkpoint
(
checkpoint
,
model_dir
,
checkpoint_name
.
format
(
step
=
current_step
))
checkpoint_name
.
format
(
step
=
current_step
))
if
sub_model_export_name
:
_save_checkpoint
(
sub_model_checkpoint
,
model_dir
,
'%s.ckpt'
%
sub_model_export_name
)
if
eval_input_fn
:
if
eval_input_fn
:
logging
.
info
(
'Running final evaluation after training is complete.'
)
logging
.
info
(
'Running final evaluation after training is complete.'
)
...
...
official/nlp/bert/model_saving_utils.py
View file @
83f0a576
...
@@ -77,37 +77,6 @@ def export_bert_model(model_export_path: typing.Text,
...
@@ -77,37 +77,6 @@ def export_bert_model(model_export_path: typing.Text,
model
.
save
(
model_export_path
,
include_optimizer
=
False
,
save_format
=
'tf'
)
model
.
save
(
model_export_path
,
include_optimizer
=
False
,
save_format
=
'tf'
)
def
export_pretraining_checkpoint
(
checkpoint_dir
:
typing
.
Text
,
model
:
tf
.
keras
.
Model
,
checkpoint_name
:
typing
.
Optional
[
typing
.
Text
]
=
'pretrained/bert_model.ckpt'
):
"""Exports BERT model for as a checkpoint without optimizer.
Arguments:
checkpoint_dir: Path to where training model checkpoints are stored.
model: Keras model object to export.
checkpoint_name: File name or suffix path to export pretrained checkpoint.
Raises:
ValueError when either checkpoint_dir or model is not specified.
"""
if
not
checkpoint_dir
:
raise
ValueError
(
'checkpoint_dir must be specified.'
)
if
not
isinstance
(
model
,
tf
.
keras
.
Model
):
raise
ValueError
(
'model must be a tf.keras.Model object.'
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
model
)
latest_checkpoint_file
=
tf
.
train
.
latest_checkpoint
(
checkpoint_dir
)
assert
latest_checkpoint_file
logging
.
info
(
'Checkpoint file %s found and restoring from '
'checkpoint'
,
latest_checkpoint_file
)
status
=
checkpoint
.
restore
(
latest_checkpoint_file
)
status
.
assert_existing_objects_matched
().
expect_partial
()
saved_path
=
checkpoint
.
save
(
os
.
path
.
join
(
checkpoint_dir
,
checkpoint_name
))
logging
.
info
(
'Exporting the model as a new TF checkpoint: %s'
,
saved_path
)
class
BertModelCheckpoint
(
tf
.
keras
.
callbacks
.
Callback
):
class
BertModelCheckpoint
(
tf
.
keras
.
callbacks
.
Callback
):
"""Keras callback that saves model at the end of every epoch."""
"""Keras callback that saves model at the end of every epoch."""
...
...
official/nlp/bert/run_pretraining.py
View file @
83f0a576
...
@@ -126,16 +126,9 @@ def run_customized_training(strategy,
...
@@ -126,16 +126,9 @@ def run_customized_training(strategy,
train_input_fn
=
train_input_fn
,
train_input_fn
=
train_input_fn
,
steps_per_epoch
=
steps_per_epoch
,
steps_per_epoch
=
steps_per_epoch
,
steps_per_loop
=
steps_per_loop
,
steps_per_loop
=
steps_per_loop
,
epochs
=
epochs
)
epochs
=
epochs
,
sub_model_export_name
=
'pretrained/bert_model'
)
# Creates the BERT core model outside distribution strategy scope.
_
,
core_model
=
bert_models
.
pretrain_model
(
bert_config
,
max_seq_length
,
max_predictions_per_seq
)
# Restores the core model from model checkpoints and get a new checkpoint only
# contains the core model.
model_saving_utils
.
export_pretraining_checkpoint
(
checkpoint_dir
=
model_dir
,
model
=
core_model
)
return
trained_model
return
trained_model
...
...
official/nlp/bert_models.py
View file @
83f0a576
...
@@ -18,139 +18,26 @@ from __future__ import absolute_import
...
@@ -18,139 +18,26 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
import
copy
import
tensorflow
as
tf
import
tensorflow
as
tf
import
tensorflow_hub
as
hub
import
tensorflow_hub
as
hub
from
official.modeling
import
tf_utils
from
official.modeling
import
tf_utils
from
official.nlp
import
bert_modeling
as
modeling
from
official.nlp
.modeling
import
losses
from
official.nlp.modeling
import
networks
from
official.nlp.modeling
import
networks
from
official.nlp.modeling.networks
import
bert_classifier
from
official.nlp.modeling.networks
import
bert_classifier
from
official.nlp.modeling.networks
import
bert_pretrainer
from
official.nlp.modeling.networks
import
bert_span_labeler
from
official.nlp.modeling.networks
import
bert_span_labeler
def
gather_indexes
(
sequence_tensor
,
positions
):
"""Gathers the vectors at the specific positions.
Args:
sequence_tensor: Sequence output of `BertModel` layer of shape
(`batch_size`, `seq_length`, num_hidden) where num_hidden is number of
hidden units of `BertModel` layer.
positions: Positions ids of tokens in sequence to mask for pretraining of
with dimension (batch_size, max_predictions_per_seq) where
`max_predictions_per_seq` is maximum number of tokens to mask out and
predict per each sequence.
Returns:
Masked out sequence tensor of shape (batch_size * max_predictions_per_seq,
num_hidden).
"""
sequence_shape
=
tf_utils
.
get_shape_list
(
sequence_tensor
,
name
=
'sequence_output_tensor'
)
batch_size
=
sequence_shape
[
0
]
seq_length
=
sequence_shape
[
1
]
width
=
sequence_shape
[
2
]
flat_offsets
=
tf
.
keras
.
backend
.
reshape
(
tf
.
range
(
0
,
batch_size
,
dtype
=
tf
.
int32
)
*
seq_length
,
[
-
1
,
1
])
flat_positions
=
tf
.
keras
.
backend
.
reshape
(
positions
+
flat_offsets
,
[
-
1
])
flat_sequence_tensor
=
tf
.
keras
.
backend
.
reshape
(
sequence_tensor
,
[
batch_size
*
seq_length
,
width
])
output_tensor
=
tf
.
gather
(
flat_sequence_tensor
,
flat_positions
)
return
output_tensor
class
BertPretrainLayer
(
tf
.
keras
.
layers
.
Layer
):
"""Wrapper layer for pre-training a BERT model.
This layer wraps an existing `bert_layer` which is a Keras Layer.
It outputs `sequence_output` from TransformerBlock sub-layer and
`sentence_output` which are suitable for feeding into a BertPretrainLoss
layer. This layer can be used along with an unsupervised input to
pre-train the embeddings for `bert_layer`.
"""
def
__init__
(
self
,
config
,
bert_layer
,
initializer
=
None
,
float_type
=
tf
.
float32
,
**
kwargs
):
super
(
BertPretrainLayer
,
self
).
__init__
(
**
kwargs
)
self
.
config
=
copy
.
deepcopy
(
config
)
self
.
float_type
=
float_type
self
.
embedding_table
=
bert_layer
.
embedding_lookup
.
embeddings
self
.
num_next_sentence_label
=
2
if
initializer
:
self
.
initializer
=
initializer
else
:
self
.
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
self
.
config
.
initializer_range
)
def
build
(
self
,
unused_input_shapes
):
"""Implements build() for the layer."""
self
.
output_bias
=
self
.
add_weight
(
shape
=
[
self
.
config
.
vocab_size
],
name
=
'predictions/output_bias'
,
initializer
=
tf
.
keras
.
initializers
.
Zeros
())
self
.
lm_dense
=
tf
.
keras
.
layers
.
Dense
(
self
.
config
.
hidden_size
,
activation
=
tf_utils
.
get_activation
(
self
.
config
.
hidden_act
),
kernel_initializer
=
self
.
initializer
,
name
=
'predictions/transform/dense'
)
self
.
lm_layer_norm
=
tf
.
keras
.
layers
.
LayerNormalization
(
axis
=-
1
,
epsilon
=
1e-12
,
name
=
'predictions/transform/LayerNorm'
)
# Next sentence binary classification dense layer including bias to match
# TF1.x BERT variable shapes.
with
tf
.
name_scope
(
'seq_relationship'
):
self
.
next_seq_weights
=
self
.
add_weight
(
shape
=
[
self
.
num_next_sentence_label
,
self
.
config
.
hidden_size
],
name
=
'output_weights'
,
initializer
=
self
.
initializer
)
self
.
next_seq_bias
=
self
.
add_weight
(
shape
=
[
self
.
num_next_sentence_label
],
name
=
'output_bias'
,
initializer
=
tf
.
keras
.
initializers
.
Zeros
())
super
(
BertPretrainLayer
,
self
).
build
(
unused_input_shapes
)
def
__call__
(
self
,
pooled_output
,
sequence_output
=
None
,
masked_lm_positions
=
None
,
**
kwargs
):
inputs
=
tf_utils
.
pack_inputs
(
[
pooled_output
,
sequence_output
,
masked_lm_positions
])
return
super
(
BertPretrainLayer
,
self
).
__call__
(
inputs
,
**
kwargs
)
def
call
(
self
,
inputs
):
"""Implements call() for the layer."""
unpacked_inputs
=
tf_utils
.
unpack_inputs
(
inputs
)
pooled_output
=
unpacked_inputs
[
0
]
sequence_output
=
unpacked_inputs
[
1
]
masked_lm_positions
=
unpacked_inputs
[
2
]
mask_lm_input_tensor
=
gather_indexes
(
sequence_output
,
masked_lm_positions
)
lm_output
=
self
.
lm_dense
(
mask_lm_input_tensor
)
lm_output
=
self
.
lm_layer_norm
(
lm_output
)
lm_output
=
tf
.
matmul
(
lm_output
,
self
.
embedding_table
,
transpose_b
=
True
)
lm_output
=
tf
.
nn
.
bias_add
(
lm_output
,
self
.
output_bias
)
lm_output
=
tf
.
nn
.
log_softmax
(
lm_output
,
axis
=-
1
)
logits
=
tf
.
matmul
(
pooled_output
,
self
.
next_seq_weights
,
transpose_b
=
True
)
logits
=
tf
.
nn
.
bias_add
(
logits
,
self
.
next_seq_bias
)
sentence_output
=
tf
.
nn
.
log_softmax
(
logits
,
axis
=-
1
)
return
(
lm_output
,
sentence_output
)
class
BertPretrainLossAndMetricLayer
(
tf
.
keras
.
layers
.
Layer
):
class
BertPretrainLossAndMetricLayer
(
tf
.
keras
.
layers
.
Layer
):
"""Returns layer that computes custom loss and metrics for pretraining."""
"""Returns layer that computes custom loss and metrics for pretraining."""
def
__init__
(
self
,
bert_config
,
**
kwargs
):
def
__init__
(
self
,
vocab_size
,
**
kwargs
):
super
(
BertPretrainLossAndMetricLayer
,
self
).
__init__
(
**
kwargs
)
super
(
BertPretrainLossAndMetricLayer
,
self
).
__init__
(
**
kwargs
)
self
.
config
=
copy
.
deepcopy
(
bert_config
)
self
.
_vocab_size
=
vocab_size
self
.
config
=
{
'vocab_size'
:
vocab_size
,
}
def
__call__
(
self
,
def
__call__
(
self
,
lm_output
,
lm_output
,
...
@@ -167,8 +54,8 @@ class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer):
...
@@ -167,8 +54,8 @@ class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer):
self
).
__call__
(
inputs
,
**
kwargs
)
self
).
__call__
(
inputs
,
**
kwargs
)
def
_add_metrics
(
self
,
lm_output
,
lm_labels
,
lm_label_weights
,
def
_add_metrics
(
self
,
lm_output
,
lm_labels
,
lm_label_weights
,
lm_
per_
example_loss
,
sentence_output
,
sentence_labels
,
lm_example_loss
,
sentence_output
,
sentence_labels
,
sentence_
per_example_
loss
):
next_
sentence_loss
):
"""Adds metrics."""
"""Adds metrics."""
masked_lm_accuracy
=
tf
.
keras
.
metrics
.
sparse_categorical_accuracy
(
masked_lm_accuracy
=
tf
.
keras
.
metrics
.
sparse_categorical_accuracy
(
lm_labels
,
lm_output
)
lm_labels
,
lm_output
)
...
@@ -178,8 +65,6 @@ class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer):
...
@@ -178,8 +65,6 @@ class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer):
self
.
add_metric
(
self
.
add_metric
(
masked_lm_accuracy
,
name
=
'masked_lm_accuracy'
,
aggregation
=
'mean'
)
masked_lm_accuracy
,
name
=
'masked_lm_accuracy'
,
aggregation
=
'mean'
)
lm_example_loss
=
tf
.
reshape
(
lm_per_example_loss
,
[
-
1
])
lm_example_loss
=
tf
.
reduce_mean
(
lm_example_loss
*
lm_label_weights
)
self
.
add_metric
(
lm_example_loss
,
name
=
'lm_example_loss'
,
aggregation
=
'mean'
)
self
.
add_metric
(
lm_example_loss
,
name
=
'lm_example_loss'
,
aggregation
=
'mean'
)
next_sentence_accuracy
=
tf
.
keras
.
metrics
.
sparse_categorical_accuracy
(
next_sentence_accuracy
=
tf
.
keras
.
metrics
.
sparse_categorical_accuracy
(
...
@@ -189,9 +74,8 @@ class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer):
...
@@ -189,9 +74,8 @@ class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer):
name
=
'next_sentence_accuracy'
,
name
=
'next_sentence_accuracy'
,
aggregation
=
'mean'
)
aggregation
=
'mean'
)
next_sentence_mean_loss
=
tf
.
reduce_mean
(
sentence_per_example_loss
)
self
.
add_metric
(
self
.
add_metric
(
next_sentence_
mean_
loss
,
name
=
'next_sentence_loss'
,
aggregation
=
'mean'
)
next_sentence_loss
,
name
=
'next_sentence_loss'
,
aggregation
=
'mean'
)
def
call
(
self
,
inputs
):
def
call
(
self
,
inputs
):
"""Implements call() for the layer."""
"""Implements call() for the layer."""
...
@@ -199,31 +83,21 @@ class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer):
...
@@ -199,31 +83,21 @@ class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer):
lm_output
=
unpacked_inputs
[
0
]
lm_output
=
unpacked_inputs
[
0
]
sentence_output
=
unpacked_inputs
[
1
]
sentence_output
=
unpacked_inputs
[
1
]
lm_label_ids
=
unpacked_inputs
[
2
]
lm_label_ids
=
unpacked_inputs
[
2
]
lm_label_ids
=
tf
.
keras
.
backend
.
reshape
(
lm_label_ids
,
[
-
1
])
lm_label_ids_one_hot
=
tf
.
keras
.
backend
.
one_hot
(
lm_label_ids
,
self
.
config
.
vocab_size
)
lm_label_weights
=
tf
.
keras
.
backend
.
cast
(
unpacked_inputs
[
3
],
tf
.
float32
)
lm_label_weights
=
tf
.
keras
.
backend
.
cast
(
unpacked_inputs
[
3
],
tf
.
float32
)
lm_label_weights
=
tf
.
keras
.
backend
.
reshape
(
lm_label_weights
,
[
-
1
])
lm_per_example_loss
=
-
tf
.
keras
.
backend
.
sum
(
lm_output
*
lm_label_ids_one_hot
,
axis
=
[
-
1
])
numerator
=
tf
.
keras
.
backend
.
sum
(
lm_label_weights
*
lm_per_example_loss
)
denominator
=
tf
.
keras
.
backend
.
sum
(
lm_label_weights
)
+
1e-5
mask_label_loss
=
numerator
/
denominator
sentence_labels
=
unpacked_inputs
[
4
]
sentence_labels
=
unpacked_inputs
[
4
]
sentence_labels
=
tf
.
keras
.
backend
.
reshape
(
sentence_labels
,
[
-
1
])
sentence_label_one_hot
=
tf
.
keras
.
backend
.
one_hot
(
sentence_labels
,
2
)
mask_label_loss
=
losses
.
weighted_sparse_categorical_crossentropy_loss
(
per_example_loss_sentence
=
-
tf
.
keras
.
backend
.
sum
(
labels
=
lm_label_ids
,
predictions
=
lm_output
,
weights
=
lm_label_weights
)
sentence_l
abel_one_hot
*
sentence_output
,
axis
=-
1
)
sentence_l
oss
=
losses
.
weighted_sparse_categorical_crossentropy_loss
(
sentence_loss
=
tf
.
keras
.
backend
.
mean
(
per_example_loss_
sentence
)
labels
=
sentence_labels
,
predictions
=
sentence
_output
)
loss
=
mask_label_loss
+
sentence_loss
loss
=
mask_label_loss
+
sentence_loss
batch_shape
=
tf
.
slice
(
tf
.
keras
.
backend
.
shape
(
sentence_labels
),
[
0
],
[
1
])
# TODO(hongkuny): Avoids the hack and switches add_loss.
# TODO(hongkuny): Avoids the hack and switches add_loss.
final_loss
=
tf
.
fill
(
final_loss
=
tf
.
fill
(
batch_shape
,
loss
)
tf
.
keras
.
backend
.
shape
(
per_example_loss_sentence
),
loss
)
self
.
_add_metrics
(
lm_output
,
lm_label_ids
,
lm_label_weights
,
self
.
_add_metrics
(
lm_output
,
lm_label_ids
,
lm_label_weights
,
lm_per_example
_loss
,
sentence_output
,
sentence_labels
,
mask_label
_loss
,
sentence_output
,
sentence_labels
,
per_example_loss_
sentence
)
sentence
_loss
)
return
final_loss
return
final_loss
...
@@ -268,13 +142,12 @@ def pretrain_model(bert_config,
...
@@ -268,13 +142,12 @@ def pretrain_model(bert_config,
seq_length: Maximum sequence length of the training data.
seq_length: Maximum sequence length of the training data.
max_predictions_per_seq: Maximum number of tokens in sequence to mask out
max_predictions_per_seq: Maximum number of tokens in sequence to mask out
and use for pretraining.
and use for pretraining.
initializer: Initializer for weights in BertPretrain
Lay
er.
initializer: Initializer for weights in BertPretrainer.
Returns:
Returns:
Pretraining model as well as core BERT submodel from which to save
Pretraining model as well as core BERT submodel from which to save
weights after pretraining.
weights after pretraining.
"""
"""
input_word_ids
=
tf
.
keras
.
layers
.
Input
(
input_word_ids
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
seq_length
,),
name
=
'input_word_ids'
,
dtype
=
tf
.
int32
)
shape
=
(
seq_length
,),
name
=
'input_word_ids'
,
dtype
=
tf
.
int32
)
input_mask
=
tf
.
keras
.
layers
.
Input
(
input_mask
=
tf
.
keras
.
layers
.
Input
(
...
@@ -285,38 +158,34 @@ def pretrain_model(bert_config,
...
@@ -285,38 +158,34 @@ def pretrain_model(bert_config,
shape
=
(
max_predictions_per_seq
,),
shape
=
(
max_predictions_per_seq
,),
name
=
'masked_lm_positions'
,
name
=
'masked_lm_positions'
,
dtype
=
tf
.
int32
)
dtype
=
tf
.
int32
)
masked_lm_ids
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
max_predictions_per_seq
,),
name
=
'masked_lm_ids'
,
dtype
=
tf
.
int32
)
masked_lm_weights
=
tf
.
keras
.
layers
.
Input
(
masked_lm_weights
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
max_predictions_per_seq
,),
shape
=
(
max_predictions_per_seq
,),
name
=
'masked_lm_weights'
,
name
=
'masked_lm_weights'
,
dtype
=
tf
.
int32
)
dtype
=
tf
.
int32
)
next_sentence_labels
=
tf
.
keras
.
layers
.
Input
(
next_sentence_labels
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
1
,),
name
=
'next_sentence_labels'
,
dtype
=
tf
.
int32
)
shape
=
(
1
,),
name
=
'next_sentence_labels'
,
dtype
=
tf
.
int32
)
masked_lm_ids
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
max_predictions_per_seq
,),
name
=
'masked_lm_ids'
,
dtype
=
tf
.
int32
)
bert_submodel_name
=
'bert_model'
transformer_encoder
=
_get_transformer_encoder
(
bert_config
,
seq_length
)
bert_submodel
=
modeling
.
get_bert_model
(
if
initializer
is
None
:
input_word_ids
,
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
input_mask
,
stddev
=
bert_config
.
initializer_range
)
input_type_ids
,
pretrainer_model
=
bert_pretrainer
.
BertPretrainer
(
name
=
bert_submodel_name
,
network
=
transformer_encoder
,
config
=
bert_config
)
num_classes
=
2
,
# The next sentence prediction label has two classes.
pooled_output
=
bert_submodel
.
outputs
[
0
]
num_token_predictions
=
max_predictions_per_seq
,
sequence_output
=
bert_submodel
.
outputs
[
1
]
pretrain_layer
=
BertPretrainLayer
(
bert_config
,
bert_submodel
.
get_layer
(
bert_submodel_name
),
initializer
=
initializer
,
initializer
=
initializer
,
name
=
'cls'
)
output
=
'predictions'
)
lm_output
,
sentence_output
=
pretrain_layer
(
pooled_output
,
sequence_output
,
masked_lm_positions
)
lm_output
,
sentence_output
=
pretrainer_model
(
[
input_word_ids
,
input_mask
,
input_type_ids
,
masked_lm_positions
])
pretrain_loss_layer
=
BertPretrainLossAndMetricLayer
(
bert_config
)
pretrain_loss_layer
=
BertPretrainLossAndMetricLayer
(
vocab_size
=
bert_config
.
vocab_size
)
output_loss
=
pretrain_loss_layer
(
lm_output
,
sentence_output
,
masked_lm_ids
,
output_loss
=
pretrain_loss_layer
(
lm_output
,
sentence_output
,
masked_lm_ids
,
masked_lm_weights
,
next_sentence_labels
)
masked_lm_weights
,
next_sentence_labels
)
keras_model
=
tf
.
keras
.
Model
(
return
tf
.
keras
.
Model
(
inputs
=
{
inputs
=
{
'input_word_ids'
:
input_word_ids
,
'input_word_ids'
:
input_word_ids
,
'input_mask'
:
input_mask
,
'input_mask'
:
input_mask
,
...
@@ -326,7 +195,8 @@ def pretrain_model(bert_config,
...
@@ -326,7 +195,8 @@ def pretrain_model(bert_config,
'masked_lm_weights'
:
masked_lm_weights
,
'masked_lm_weights'
:
masked_lm_weights
,
'next_sentence_labels'
:
next_sentence_labels
,
'next_sentence_labels'
:
next_sentence_labels
,
},
},
outputs
=
output_loss
),
bert_submodel
outputs
=
output_loss
)
return
keras_model
,
transformer_encoder
class
BertSquadLogitsLayer
(
tf
.
keras
.
layers
.
Layer
):
class
BertSquadLogitsLayer
(
tf
.
keras
.
layers
.
Layer
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment