Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
4651111e
Commit
4651111e
authored
Jun 04, 2020
by
A. Unique TensorFlower
Browse files
Merge pull request #8620 from tensorflow:revert-8619-revert-8617-xinliu
PiperOrigin-RevId: 314678046
parents
87292aa4
a6c0e677
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
120 additions
and
8 deletions
+120
-8
official/nlp/modeling/layers/position_embedding.py
official/nlp/modeling/layers/position_embedding.py
+85
-0
official/nlp/modeling/layers/position_embedding_test.py
official/nlp/modeling/layers/position_embedding_test.py
+28
-0
official/nlp/transformer/transformer.py
official/nlp/transformer/transformer.py
+7
-8
No files found.
official/nlp/modeling/layers/position_embedding.py
View file @
4651111e
...
@@ -19,6 +19,8 @@ from __future__ import division
...
@@ -19,6 +19,8 @@ from __future__ import division
# from __future__ import google_type_annotations
# from __future__ import google_type_annotations
from
__future__
import
print_function
from
__future__
import
print_function
import
math
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.modeling
import
tf_utils
...
@@ -118,3 +120,86 @@ class PositionEmbedding(tf.keras.layers.Layer):
...
@@ -118,3 +120,86 @@ class PositionEmbedding(tf.keras.layers.Layer):
position_embeddings
=
self
.
_position_embeddings
position_embeddings
=
self
.
_position_embeddings
return
tf
.
broadcast_to
(
position_embeddings
,
input_shape
)
return
tf
.
broadcast_to
(
position_embeddings
,
input_shape
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
"Text"
)
class
RelativePositionEmbedding
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a positional embedding.
This layer calculates the position encoding as a mix of sine and cosine
functions with geometrically increasing wavelengths. Defined and formulized in
"Attention is All You Need", section 3.5.
(https://arxiv.org/abs/1706.03762).
Arguments:
hidden_size: Size of the hidden layer.
min_timescale: Minimum scale that will be applied at each position
max_timescale: Maximum scale that will be applied at each position.
"""
def
__init__
(
self
,
hidden_size
,
min_timescale
=
1.0
,
max_timescale
=
1.0e4
,
**
kwargs
):
# We need to have a default dtype of float32, since the inputs (which Keras
# usually uses to infer the dtype) will always be int32.
# We compute the positional encoding in float32 even if the model uses
# float16, as many of the ops used, like log and exp, are numerically
# unstable in float16.
if
"dtype"
not
in
kwargs
:
kwargs
[
"dtype"
]
=
"float32"
super
(
RelativePositionEmbedding
,
self
).
__init__
(
**
kwargs
)
self
.
_hidden_size
=
hidden_size
self
.
_min_timescale
=
min_timescale
self
.
_max_timescale
=
max_timescale
def
get_config
(
self
):
config
=
{
"hidden_size"
:
self
.
_hidden_size
,
"min_timescale"
:
self
.
_min_timescale
,
"max_timescale"
:
self
.
_max_timescale
,
"length"
:
self
.
_length
,
}
base_config
=
super
(
RelativePositionEmbedding
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
,
length
=
None
):
"""Implements call() for the layer.
Args:
inputs: An tensor whose second dimension will be used as `length`. If
`None`, the other `length` argument must be specified.
length: An optional integer specifying the number of positions. If both
`inputs` and `length` are spcified, `length` must be equal to the
second dimension of `inputs`.
Returns:
A tensor in shape of [length, hidden_size].
"""
if
inputs
is
None
and
length
is
None
:
raise
ValueError
(
"If inputs is None, `length` must be set in "
"RelativePositionEmbedding()."
)
if
inputs
is
not
None
:
input_shape
=
tf_utils
.
get_shape_list
(
inputs
)
if
length
is
not
None
and
length
!=
input_shape
[
1
]:
raise
ValueError
(
"If inputs is not None, `length` must equal to input_shape[1]."
)
length
=
input_shape
[
1
]
position
=
tf
.
cast
(
tf
.
range
(
length
),
tf
.
float32
)
num_timescales
=
self
.
_hidden_size
//
2
min_timescale
,
max_timescale
=
self
.
_min_timescale
,
self
.
_max_timescale
log_timescale_increment
=
(
math
.
log
(
float
(
max_timescale
)
/
float
(
min_timescale
))
/
(
tf
.
cast
(
num_timescales
,
tf
.
float32
)
-
1
))
inv_timescales
=
min_timescale
*
tf
.
exp
(
tf
.
cast
(
tf
.
range
(
num_timescales
),
tf
.
float32
)
*
-
log_timescale_increment
)
scaled_time
=
tf
.
expand_dims
(
position
,
1
)
*
tf
.
expand_dims
(
inv_timescales
,
0
)
position_embeddings
=
tf
.
concat
([
tf
.
sin
(
scaled_time
),
tf
.
cos
(
scaled_time
)],
axis
=
1
)
return
position_embeddings
official/nlp/modeling/layers/position_embedding_test.py
View file @
4651111e
...
@@ -98,6 +98,34 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
...
@@ -98,6 +98,34 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
self
.
assertAllEqual
([
1
,
input_length
,
width
],
output_data
.
shape
)
self
.
assertAllEqual
([
1
,
input_length
,
width
],
output_data
.
shape
)
def
test_relative_tensor_input
(
self
):
hidden_size
=
8
test_layer
=
position_embedding
.
RelativePositionEmbedding
(
hidden_size
=
hidden_size
)
# create a 3-dimensional input for test_layer to infer length as 1.
input_tensor
=
tf
.
constant
([[[
0
]
*
hidden_size
]])
output_tensor
=
test_layer
(
input_tensor
)
# expected output is the theoretical result of the input based on
# sine cosine relative position embedding formula.
expected_output_tensor
=
tf
.
constant
([[
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
]])
self
.
assertAllEqual
(
output_tensor
,
expected_output_tensor
)
def
test_relative_length_input
(
self
):
hidden_size
=
8
# When we do not have tensor as input, we explicitly specify length
# value when initializing test_layer.
test_layer
=
position_embedding
.
RelativePositionEmbedding
(
hidden_size
=
hidden_size
)
input_tensor
=
None
output_tensor
=
test_layer
(
input_tensor
,
length
=
1
)
# expected output is the theoretical result of the input based on
# sine cosine relative position embedding formula.
expected_output_tensor
=
tf
.
constant
([[
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
]])
self
.
assertAllEqual
(
output_tensor
,
expected_output_tensor
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
tf
.
test
.
main
()
official/nlp/transformer/transformer.py
View file @
4651111e
...
@@ -22,6 +22,7 @@ from __future__ import division
...
@@ -22,6 +22,7 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.nlp.modeling.layers
import
position_embedding
from
official.nlp.transformer
import
attention_layer
from
official.nlp.transformer
import
attention_layer
from
official.nlp.transformer
import
beam_search
from
official.nlp.transformer
import
beam_search
from
official.nlp.transformer
import
embedding_layer
from
official.nlp.transformer
import
embedding_layer
...
@@ -89,6 +90,8 @@ class Transformer(tf.keras.Model):
...
@@ -89,6 +90,8 @@ class Transformer(tf.keras.Model):
params
[
"vocab_size"
],
params
[
"hidden_size"
])
params
[
"vocab_size"
],
params
[
"hidden_size"
])
self
.
encoder_stack
=
EncoderStack
(
params
)
self
.
encoder_stack
=
EncoderStack
(
params
)
self
.
decoder_stack
=
DecoderStack
(
params
)
self
.
decoder_stack
=
DecoderStack
(
params
)
self
.
position_embedding
=
position_embedding
.
RelativePositionEmbedding
(
hidden_size
=
self
.
params
[
"hidden_size"
])
def
get_config
(
self
):
def
get_config
(
self
):
return
{
return
{
...
@@ -170,9 +173,7 @@ class Transformer(tf.keras.Model):
...
@@ -170,9 +173,7 @@ class Transformer(tf.keras.Model):
attention_bias
=
tf
.
cast
(
attention_bias
,
self
.
params
[
"dtype"
])
attention_bias
=
tf
.
cast
(
attention_bias
,
self
.
params
[
"dtype"
])
with
tf
.
name_scope
(
"add_pos_encoding"
):
with
tf
.
name_scope
(
"add_pos_encoding"
):
length
=
tf
.
shape
(
embedded_inputs
)[
1
]
pos_encoding
=
self
.
position_embedding
(
inputs
=
embedded_inputs
)
pos_encoding
=
model_utils
.
get_position_encoding
(
length
,
self
.
params
[
"hidden_size"
])
pos_encoding
=
tf
.
cast
(
pos_encoding
,
self
.
params
[
"dtype"
])
pos_encoding
=
tf
.
cast
(
pos_encoding
,
self
.
params
[
"dtype"
])
encoder_inputs
=
embedded_inputs
+
pos_encoding
encoder_inputs
=
embedded_inputs
+
pos_encoding
...
@@ -209,8 +210,7 @@ class Transformer(tf.keras.Model):
...
@@ -209,8 +210,7 @@ class Transformer(tf.keras.Model):
[[
0
,
0
],
[
1
,
0
],
[
0
,
0
]])[:,
:
-
1
,
:]
[[
0
,
0
],
[
1
,
0
],
[
0
,
0
]])[:,
:
-
1
,
:]
with
tf
.
name_scope
(
"add_pos_encoding"
):
with
tf
.
name_scope
(
"add_pos_encoding"
):
length
=
tf
.
shape
(
decoder_inputs
)[
1
]
length
=
tf
.
shape
(
decoder_inputs
)[
1
]
pos_encoding
=
model_utils
.
get_position_encoding
(
pos_encoding
=
self
.
position_embedding
(
decoder_inputs
)
length
,
self
.
params
[
"hidden_size"
])
pos_encoding
=
tf
.
cast
(
pos_encoding
,
self
.
params
[
"dtype"
])
pos_encoding
=
tf
.
cast
(
pos_encoding
,
self
.
params
[
"dtype"
])
decoder_inputs
+=
pos_encoding
decoder_inputs
+=
pos_encoding
if
training
:
if
training
:
...
@@ -232,9 +232,8 @@ class Transformer(tf.keras.Model):
...
@@ -232,9 +232,8 @@ class Transformer(tf.keras.Model):
def
_get_symbols_to_logits_fn
(
self
,
max_decode_length
,
training
):
def
_get_symbols_to_logits_fn
(
self
,
max_decode_length
,
training
):
"""Returns a decoding function that calculates logits of the next tokens."""
"""Returns a decoding function that calculates logits of the next tokens."""
timing_signal
=
self
.
position_embedding
(
timing_signal
=
model_utils
.
get_position_encoding
(
inputs
=
None
,
length
=
max_decode_length
+
1
)
max_decode_length
+
1
,
self
.
params
[
"hidden_size"
])
timing_signal
=
tf
.
cast
(
timing_signal
,
self
.
params
[
"dtype"
])
timing_signal
=
tf
.
cast
(
timing_signal
,
self
.
params
[
"dtype"
])
decoder_self_attention_bias
=
model_utils
.
get_decoder_self_attention_bias
(
decoder_self_attention_bias
=
model_utils
.
get_decoder_self_attention_bias
(
max_decode_length
,
dtype
=
self
.
params
[
"dtype"
])
max_decode_length
,
dtype
=
self
.
params
[
"dtype"
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment