Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
d8a9c16e
Commit
d8a9c16e
authored
Aug 03, 2020
by
A. Unique TensorFlower
Browse files
Merge pull request #9037 from xinliupitt:master
PiperOrigin-RevId: 324736903
parents
63bd9f6e
dc03c043
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
8 deletions
+28
-8
official/nlp/modeling/layers/transformer.py
official/nlp/modeling/layers/transformer.py
+18
-2
official/nlp/modeling/layers/transformer_test.py
official/nlp/modeling/layers/transformer_test.py
+8
-4
official/nlp/tasks/sentence_prediction_test.py
official/nlp/tasks/sentence_prediction_test.py
+2
-2
No files found.
official/nlp/modeling/layers/transformer.py
View file @
d8a9c16e
...
...
@@ -55,6 +55,7 @@ class Transformer(tf.keras.layers.Layer):
layers. If set False, output of attention and intermediate dense layers is
normalized.
norm_epsilon: Epsilon value to initialize normalization layers.
intermediate_dropout: Dropout probability for intermediate_dropout_layer.
"""
def
__init__
(
self
,
...
...
@@ -74,6 +75,7 @@ class Transformer(tf.keras.layers.Layer):
use_bias
=
True
,
norm_first
=
False
,
norm_epsilon
=
1e-12
,
intermediate_dropout
=
0.0
,
**
kwargs
):
super
(
Transformer
,
self
).
__init__
(
**
kwargs
)
...
...
@@ -93,6 +95,7 @@ class Transformer(tf.keras.layers.Layer):
self
.
_use_bias
=
use_bias
self
.
_norm_first
=
norm_first
self
.
_norm_epsilon
=
norm_epsilon
self
.
_intermediate_dropout
=
intermediate_dropout
def
build
(
self
,
input_shape
):
input_tensor
=
input_shape
[
0
]
if
len
(
input_shape
)
==
2
else
input_shape
...
...
@@ -155,6 +158,8 @@ class Transformer(tf.keras.layers.Layer):
policy
=
tf
.
float32
self
.
_intermediate_activation_layer
=
tf
.
keras
.
layers
.
Activation
(
self
.
_intermediate_activation
,
dtype
=
policy
)
self
.
_intermediate_dropout_layer
=
tf
.
keras
.
layers
.
Dropout
(
rate
=
self
.
_intermediate_dropout
)
self
.
_output_dense
=
tf
.
keras
.
layers
.
experimental
.
EinsumDense
(
"abc,cd->abd"
,
output_shape
=
(
None
,
hidden_size
),
...
...
@@ -204,7 +209,9 @@ class Transformer(tf.keras.layers.Layer):
"norm_first"
:
self
.
_norm_first
,
"norm_epsilon"
:
self
.
_norm_epsilon
self
.
_norm_epsilon
,
"intermediate_dropout"
:
self
.
_intermediate_dropout
}
base_config
=
super
(
Transformer
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
...
...
@@ -238,6 +245,7 @@ class Transformer(tf.keras.layers.Layer):
intermediate_output
=
self
.
_intermediate_dense
(
attention_output
)
intermediate_output
=
self
.
_intermediate_activation_layer
(
intermediate_output
)
intermediate_output
=
self
.
_intermediate_dropout_layer
(
intermediate_output
)
layer_output
=
self
.
_output_dense
(
intermediate_output
)
layer_output
=
self
.
_output_dropout
(
layer_output
)
# During mixed precision training, attention_output is from layer norm and
...
...
@@ -291,6 +299,7 @@ class TransformerDecoderLayer(tf.keras.layers.Layer):
layers. If set False, output of attention and intermediate dense layers is
normalized.
norm_epsilon: Epsilon value to initialize normalization layers.
intermediate_dropout: Dropout probability for intermediate_dropout_layer.
"""
def
__init__
(
self
,
...
...
@@ -310,6 +319,7 @@ class TransformerDecoderLayer(tf.keras.layers.Layer):
use_bias
=
True
,
norm_first
=
False
,
norm_epsilon
=
1e-12
,
intermediate_dropout
=
0.0
,
**
kwargs
):
super
(
TransformerDecoderLayer
,
self
).
__init__
(
**
kwargs
)
self
.
num_attention_heads
=
num_attention_heads
...
...
@@ -329,6 +339,7 @@ class TransformerDecoderLayer(tf.keras.layers.Layer):
self
.
_use_bias
=
use_bias
self
.
_norm_first
=
norm_first
self
.
_norm_epsilon
=
norm_epsilon
self
.
_intermediate_dropout
=
intermediate_dropout
if
self
.
multi_channel_cross_attention
:
self
.
_cross_attention_cls
=
multi_channel_attention
.
MultiChannelAttention
else
:
...
...
@@ -401,6 +412,8 @@ class TransformerDecoderLayer(tf.keras.layers.Layer):
**
common_kwargs
)
self
.
intermediate_activation_layer
=
tf
.
keras
.
layers
.
Activation
(
self
.
intermediate_activation
)
self
.
_intermediate_dropout_layer
=
tf
.
keras
.
layers
.
Dropout
(
rate
=
self
.
_intermediate_dropout
)
self
.
output_dense
=
tf
.
keras
.
layers
.
experimental
.
EinsumDense
(
"abc,cd->abd"
,
output_shape
=
(
None
,
hidden_size
),
...
...
@@ -445,7 +458,9 @@ class TransformerDecoderLayer(tf.keras.layers.Layer):
"norm_first"
:
self
.
_norm_first
,
"norm_epsilon"
:
self
.
_norm_epsilon
self
.
_norm_epsilon
,
"intermediate_dropout"
:
self
.
_intermediate_dropout
}
base_config
=
super
(
TransformerDecoderLayer
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
...
...
@@ -508,6 +523,7 @@ class TransformerDecoderLayer(tf.keras.layers.Layer):
intermediate_output
=
self
.
intermediate_dense
(
attention_output
)
intermediate_output
=
self
.
intermediate_activation_layer
(
intermediate_output
)
intermediate_output
=
self
.
_intermediate_dropout_layer
(
intermediate_output
)
layer_output
=
self
.
output_dense
(
intermediate_output
)
layer_output
=
self
.
output_dropout
(
layer_output
)
if
self
.
_norm_first
:
...
...
official/nlp/modeling/layers/transformer_test.py
View file @
d8a9c16e
...
...
@@ -230,7 +230,8 @@ class TransformerArgumentTest(keras_parameterized.TestCase):
attention_dropout_rate
=
0.1
,
use_bias
=
False
,
norm_first
=
True
,
norm_epsilon
=
1e-6
)
norm_epsilon
=
1e-6
,
intermediate_dropout
=
0.1
)
# Forward path.
dummy_tensor
=
tf
.
zeros
([
2
,
4
,
16
],
dtype
=
tf
.
float32
)
dummy_mask
=
tf
.
zeros
([
2
,
4
,
4
],
dtype
=
tf
.
float32
)
...
...
@@ -248,7 +249,8 @@ class TransformerArgumentTest(keras_parameterized.TestCase):
attention_dropout_rate
=
0.1
,
use_bias
=
False
,
norm_first
=
True
,
norm_epsilon
=
1e-6
)
norm_epsilon
=
1e-6
,
intermediate_dropout
=
0.1
)
encoder_block_config
=
encoder_block
.
get_config
()
new_encoder_block
=
transformer
.
Transformer
.
from_config
(
encoder_block_config
)
...
...
@@ -299,7 +301,8 @@ class TransformerDecoderLayerTest(keras_parameterized.TestCase):
attention_dropout_rate
=
0.1
,
use_bias
=
False
,
norm_first
=
True
,
norm_epsilon
=
1e-6
)
norm_epsilon
=
1e-6
,
intermediate_dropout
=
0.1
)
# Forward path.
dummy_tensor
=
tf
.
zeros
([
2
,
4
,
16
],
dtype
=
tf
.
float32
)
dummy_mask
=
tf
.
zeros
([
2
,
4
,
4
],
dtype
=
tf
.
float32
)
...
...
@@ -317,7 +320,8 @@ class TransformerDecoderLayerTest(keras_parameterized.TestCase):
attention_dropout_rate
=
0.1
,
use_bias
=
False
,
norm_first
=
True
,
norm_epsilon
=
1e-6
)
norm_epsilon
=
1e-6
,
intermediate_dropout
=
0.1
)
decoder_block_config
=
decoder_block
.
get_config
()
new_decoder_block
=
transformer
.
TransformerDecoderLayer
.
from_config
(
decoder_block_config
)
...
...
official/nlp/tasks/sentence_prediction_test.py
View file @
d8a9c16e
...
...
@@ -147,9 +147,9 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase):
logs
=
task
.
validation_step
(
next
(
iterator
),
model
,
metrics
=
metrics
)
loss
=
logs
[
"loss"
].
numpy
()
if
num_classes
==
1
:
self
.
assert
AlmostEqual
(
loss
,
42.77483
,
places
=
3
)
self
.
assert
Greater
(
loss
,
1.0
)
else
:
self
.
assert
AlmostEqual
(
loss
,
3.57627e-6
,
places
=
3
)
self
.
assert
Less
(
loss
,
1.0
)
@
parameterized
.
parameters
((
"matthews_corrcoef"
,
2
),
(
"pearson_spearman_corr"
,
1
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment