Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
8856b918
Commit
8856b918
authored
Aug 20, 2019
by
Reed Wanderman-Milne
Committed by
A. Unique TensorFlower
Aug 20, 2019
Browse files
Internal change
PiperOrigin-RevId: 264500330
parent
dab0c03a
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
5 deletions
+20
-5
official/bert/modeling.py
official/bert/modeling.py
+14
-4
official/bert/run_squad.py
official/bert/run_squad.py
+6
-1
No files found.
official/bert/modeling.py
View file @
8856b918
...
...
@@ -165,6 +165,7 @@ class BertModel(tf.keras.layers.Layer):
max_position_embeddings
=
self
.
config
.
max_position_embeddings
,
dropout_prob
=
self
.
config
.
hidden_dropout_prob
,
initializer_range
=
self
.
config
.
initializer_range
,
dtype
=
tf
.
float32
,
name
=
"embedding_postprocessor"
)
self
.
encoder
=
Transformer
(
num_hidden_layers
=
self
.
config
.
num_hidden_layers
,
...
...
@@ -316,8 +317,9 @@ class EmbeddingPostprocessor(tf.keras.layers.Layer):
dtype
=
self
.
dtype
)
self
.
output_layer_norm
=
tf
.
keras
.
layers
.
LayerNormalization
(
name
=
"layer_norm"
,
axis
=-
1
,
epsilon
=
1e-12
)
self
.
output_dropout
=
tf
.
keras
.
layers
.
Dropout
(
rate
=
self
.
dropout_prob
)
name
=
"layer_norm"
,
axis
=-
1
,
epsilon
=
1e-12
,
dtype
=
tf
.
float32
)
self
.
output_dropout
=
tf
.
keras
.
layers
.
Dropout
(
rate
=
self
.
dropout_prob
,
dtype
=
tf
.
float32
)
super
(
EmbeddingPostprocessor
,
self
).
build
(
input_shapes
)
def
__call__
(
self
,
word_embeddings
,
token_type_ids
=
None
,
**
kwargs
):
...
...
@@ -714,11 +716,15 @@ class TransformerBlock(tf.keras.layers.Layer):
rate
=
self
.
hidden_dropout_prob
)
self
.
attention_layer_norm
=
(
tf
.
keras
.
layers
.
LayerNormalization
(
name
=
"self_attention_layer_norm"
,
axis
=-
1
,
epsilon
=
1e-12
))
name
=
"self_attention_layer_norm"
,
axis
=-
1
,
epsilon
=
1e-12
,
# We do layer norm in float32 for numeric stability.
dtype
=
tf
.
float32
))
self
.
intermediate_dense
=
Dense2DProjection
(
output_size
=
self
.
intermediate_size
,
kernel_initializer
=
get_initializer
(
self
.
initializer_range
),
activation
=
self
.
intermediate_activation
,
# Uses float32 so that gelu activation is done in float32.
dtype
=
tf
.
float32
,
name
=
"intermediate"
)
self
.
output_dense
=
Dense2DProjection
(
output_size
=
self
.
hidden_size
,
...
...
@@ -726,7 +732,7 @@ class TransformerBlock(tf.keras.layers.Layer):
name
=
"output"
)
self
.
output_dropout
=
tf
.
keras
.
layers
.
Dropout
(
rate
=
self
.
hidden_dropout_prob
)
self
.
output_layer_norm
=
tf
.
keras
.
layers
.
LayerNormalization
(
name
=
"output_layer_norm"
,
axis
=-
1
,
epsilon
=
1e-12
)
name
=
"output_layer_norm"
,
axis
=-
1
,
epsilon
=
1e-12
,
dtype
=
tf
.
float32
)
super
(
TransformerBlock
,
self
).
build
(
unused_input_shapes
)
def
common_layers
(
self
):
...
...
@@ -753,6 +759,10 @@ class TransformerBlock(tf.keras.layers.Layer):
attention_output
=
self
.
attention_dropout
(
attention_output
)
# Use float32 in keras layer norm and the gelu activation in the
# intermediate dense layer for numeric stability
# TODO(reedwm): These casts are probably unnecessary, as we passed
# dtype=tf.float32 to the layer norm constructor, so it will cast its inputs
# to float32 automatically. These manual casts additionally do the "+"
# operator in float32, but "+" is numerically stable in float16.
if
self
.
float_type
==
tf
.
float16
:
input_tensor
=
tf
.
cast
(
input_tensor
,
tf
.
float32
)
attention_output
=
tf
.
cast
(
attention_output
,
tf
.
float32
)
...
...
official/bert/run_squad.py
View file @
8856b918
...
...
@@ -139,6 +139,8 @@ def predict_squad_customized(strategy, input_meta_data, bert_config,
strategy
.
experimental_distribute_dataset
(
predict_dataset
))
with
strategy
.
scope
():
# Prediction always uses float32, even if training uses mixed precision.
tf
.
keras
.
mixed_precision
.
experimental
.
set_policy
(
'float32'
)
squad_model
,
_
=
bert_models
.
squad_model
(
bert_config
,
input_meta_data
[
'max_seq_length'
],
float_type
=
tf
.
float32
)
...
...
@@ -187,7 +189,7 @@ def train_squad(strategy,
use_float16
=
common_flags
.
use_float16
()
if
use_float16
:
policy
=
tf
.
keras
.
mixed_precision
.
experimental
.
Policy
(
'
infer
_float
32_vars
'
)
policy
=
tf
.
keras
.
mixed_precision
.
experimental
.
Policy
(
'
mixed
_float
16
'
)
tf
.
keras
.
mixed_precision
.
experimental
.
set_policy
(
policy
)
bert_config
=
modeling
.
BertConfig
.
from_json_file
(
FLAGS
.
bert_config_file
)
...
...
@@ -212,6 +214,9 @@ def train_squad(strategy,
squad_model
.
optimizer
=
optimization
.
create_optimizer
(
FLAGS
.
learning_rate
,
steps_per_epoch
*
epochs
,
warmup_steps
)
if
use_float16
:
# Wraps optimizer with a LossScaleOptimizer. This is done automatically
# in compile() with the "mixed_float16" policy, but since we do not call
# compile(), we must wrap the optimizer manually.
squad_model
.
optimizer
=
(
tf
.
keras
.
mixed_precision
.
experimental
.
LossScaleOptimizer
(
squad_model
.
optimizer
,
loss_scale
=
common_flags
.
get_loss_scale
()))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment