Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
1862b9c3
Commit
1862b9c3
authored
Sep 17, 2019
by
Hongkun Yu
Committed by
A. Unique TensorFlower
Sep 17, 2019
Browse files
Move Bert to NLP. Tasks are moved to nlp/bert/
Refactor basic utils to modeling/ PiperOrigin-RevId: 269600561
parent
ce237770
Changes
26
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
32 additions
and
189 deletions
+32
-189
official/nlp/bert/tokenization_test.py
official/nlp/bert/tokenization_test.py
+1
-1
official/nlp/bert_modeling.py
official/nlp/bert_modeling.py
+20
-178
official/nlp/bert_models.py
official/nlp/bert_models.py
+8
-7
official/nlp/optimization.py
official/nlp/optimization.py
+0
-0
official/nlp/xlnet/optimization.py
official/nlp/xlnet/optimization.py
+2
-2
official/transformer/v2/attention_layer.py
official/transformer/v2/attention_layer.py
+1
-1
No files found.
official/bert/tokenization_test.py
→
official/
nlp/
bert/tokenization_test.py
View file @
1862b9c3
...
@@ -22,7 +22,7 @@ import tempfile
...
@@ -22,7 +22,7 @@ import tempfile
import
six
import
six
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.bert
import
tokenization
from
official.
nlp.
bert
import
tokenization
class
TokenizationTest
(
tf
.
test
.
TestCase
):
class
TokenizationTest
(
tf
.
test
.
TestCase
):
...
...
official/bert
/
modeling.py
→
official/
nlp/
bert
_
modeling.py
View file @
1862b9c3
...
@@ -24,6 +24,8 @@ import math
...
@@ -24,6 +24,8 @@ import math
import
six
import
six
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
class
BertConfig
(
object
):
class
BertConfig
(
object
):
"""Configuration for `BertModel`."""
"""Configuration for `BertModel`."""
...
@@ -191,7 +193,7 @@ class BertModel(tf.keras.layers.Layer):
...
@@ -191,7 +193,7 @@ class BertModel(tf.keras.layers.Layer):
input_mask
=
None
,
input_mask
=
None
,
input_type_ids
=
None
,
input_type_ids
=
None
,
**
kwargs
):
**
kwargs
):
inputs
=
pack_inputs
([
input_word_ids
,
input_mask
,
input_type_ids
])
inputs
=
tf_utils
.
pack_inputs
([
input_word_ids
,
input_mask
,
input_type_ids
])
return
super
(
BertModel
,
self
).
__call__
(
inputs
,
**
kwargs
)
return
super
(
BertModel
,
self
).
__call__
(
inputs
,
**
kwargs
)
def
call
(
self
,
inputs
,
mode
=
"bert"
):
def
call
(
self
,
inputs
,
mode
=
"bert"
):
...
@@ -205,7 +207,7 @@ class BertModel(tf.keras.layers.Layer):
...
@@ -205,7 +207,7 @@ class BertModel(tf.keras.layers.Layer):
is a float Tensor of shape [batch_size, seq_length, hidden_size] or
is a float Tensor of shape [batch_size, seq_length, hidden_size] or
a list of output tensors for encoder usage (mode=`encoder`).
a list of output tensors for encoder usage (mode=`encoder`).
"""
"""
unpacked_inputs
=
unpack_inputs
(
inputs
)
unpacked_inputs
=
tf_utils
.
unpack_inputs
(
inputs
)
input_word_ids
=
unpacked_inputs
[
0
]
input_word_ids
=
unpacked_inputs
[
0
]
input_mask
=
unpacked_inputs
[
1
]
input_mask
=
unpacked_inputs
[
1
]
input_type_ids
=
unpacked_inputs
[
2
]
input_type_ids
=
unpacked_inputs
[
2
]
...
@@ -260,7 +262,7 @@ class EmbeddingLookup(tf.keras.layers.Layer):
...
@@ -260,7 +262,7 @@ class EmbeddingLookup(tf.keras.layers.Layer):
def
call
(
self
,
inputs
):
def
call
(
self
,
inputs
):
"""Implements call() for the layer."""
"""Implements call() for the layer."""
input_shape
=
get_shape_list
(
inputs
)
input_shape
=
tf_utils
.
get_shape_list
(
inputs
)
flat_input
=
tf
.
reshape
(
inputs
,
[
-
1
])
flat_input
=
tf
.
reshape
(
inputs
,
[
-
1
])
output
=
tf
.
gather
(
self
.
embeddings
,
flat_input
)
output
=
tf
.
gather
(
self
.
embeddings
,
flat_input
)
output
=
tf
.
reshape
(
output
,
input_shape
+
[
self
.
embedding_size
])
output
=
tf
.
reshape
(
output
,
input_shape
+
[
self
.
embedding_size
])
...
@@ -323,15 +325,15 @@ class EmbeddingPostprocessor(tf.keras.layers.Layer):
...
@@ -323,15 +325,15 @@ class EmbeddingPostprocessor(tf.keras.layers.Layer):
super
(
EmbeddingPostprocessor
,
self
).
build
(
input_shapes
)
super
(
EmbeddingPostprocessor
,
self
).
build
(
input_shapes
)
def
__call__
(
self
,
word_embeddings
,
token_type_ids
=
None
,
**
kwargs
):
def
__call__
(
self
,
word_embeddings
,
token_type_ids
=
None
,
**
kwargs
):
inputs
=
pack_inputs
([
word_embeddings
,
token_type_ids
])
inputs
=
tf_utils
.
pack_inputs
([
word_embeddings
,
token_type_ids
])
return
super
(
EmbeddingPostprocessor
,
self
).
__call__
(
inputs
,
**
kwargs
)
return
super
(
EmbeddingPostprocessor
,
self
).
__call__
(
inputs
,
**
kwargs
)
def
call
(
self
,
inputs
):
def
call
(
self
,
inputs
):
"""Implements call() for the layer."""
"""Implements call() for the layer."""
unpacked_inputs
=
unpack_inputs
(
inputs
)
unpacked_inputs
=
tf_utils
.
unpack_inputs
(
inputs
)
word_embeddings
=
unpacked_inputs
[
0
]
word_embeddings
=
unpacked_inputs
[
0
]
token_type_ids
=
unpacked_inputs
[
1
]
token_type_ids
=
unpacked_inputs
[
1
]
input_shape
=
get_shape_list
(
word_embeddings
,
expected_rank
=
3
)
input_shape
=
tf_utils
.
get_shape_list
(
word_embeddings
,
expected_rank
=
3
)
batch_size
=
input_shape
[
0
]
batch_size
=
input_shape
[
0
]
seq_length
=
input_shape
[
1
]
seq_length
=
input_shape
[
1
]
width
=
input_shape
[
2
]
width
=
input_shape
[
2
]
...
@@ -429,12 +431,12 @@ class Attention(tf.keras.layers.Layer):
...
@@ -429,12 +431,12 @@ class Attention(tf.keras.layers.Layer):
return
output_tensor
return
output_tensor
def
__call__
(
self
,
from_tensor
,
to_tensor
,
attention_mask
=
None
,
**
kwargs
):
def
__call__
(
self
,
from_tensor
,
to_tensor
,
attention_mask
=
None
,
**
kwargs
):
inputs
=
pack_inputs
([
from_tensor
,
to_tensor
,
attention_mask
])
inputs
=
tf_utils
.
pack_inputs
([
from_tensor
,
to_tensor
,
attention_mask
])
return
super
(
Attention
,
self
).
__call__
(
inputs
,
**
kwargs
)
return
super
(
Attention
,
self
).
__call__
(
inputs
,
**
kwargs
)
def
call
(
self
,
inputs
):
def
call
(
self
,
inputs
):
"""Implements call() for the layer."""
"""Implements call() for the layer."""
(
from_tensor
,
to_tensor
,
attention_mask
)
=
unpack_inputs
(
inputs
)
(
from_tensor
,
to_tensor
,
attention_mask
)
=
tf_utils
.
unpack_inputs
(
inputs
)
# Scalar dimensions referenced here:
# Scalar dimensions referenced here:
# B = batch size (number of sequences)
# B = batch size (number of sequences)
...
@@ -707,7 +709,8 @@ class TransformerBlock(tf.keras.layers.Layer):
...
@@ -707,7 +709,8 @@ class TransformerBlock(tf.keras.layers.Layer):
self
.
hidden_size
=
hidden_size
self
.
hidden_size
=
hidden_size
self
.
num_attention_heads
=
num_attention_heads
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
intermediate_size
=
intermediate_size
self
.
intermediate_activation
=
get_activation
(
intermediate_activation
)
self
.
intermediate_activation
=
tf_utils
.
get_activation
(
intermediate_activation
)
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
initializer_range
=
initializer_range
self
.
initializer_range
=
initializer_range
...
@@ -769,12 +772,12 @@ class TransformerBlock(tf.keras.layers.Layer):
...
@@ -769,12 +772,12 @@ class TransformerBlock(tf.keras.layers.Layer):
]
]
def
__call__
(
self
,
input_tensor
,
attention_mask
=
None
):
def
__call__
(
self
,
input_tensor
,
attention_mask
=
None
):
inputs
=
pack_inputs
([
input_tensor
,
attention_mask
])
inputs
=
tf_utils
.
pack_inputs
([
input_tensor
,
attention_mask
])
return
super
(
TransformerBlock
,
self
).
__call__
(
inputs
)
return
super
(
TransformerBlock
,
self
).
__call__
(
inputs
)
def
call
(
self
,
inputs
):
def
call
(
self
,
inputs
):
"""Implements call() for the layer."""
"""Implements call() for the layer."""
(
input_tensor
,
attention_mask
)
=
unpack_inputs
(
inputs
)
(
input_tensor
,
attention_mask
)
=
tf_utils
.
unpack_inputs
(
inputs
)
attention_output
=
self
.
attention_layer
(
attention_output
=
self
.
attention_layer
(
from_tensor
=
input_tensor
,
from_tensor
=
input_tensor
,
to_tensor
=
input_tensor
,
to_tensor
=
input_tensor
,
...
@@ -835,7 +838,8 @@ class Transformer(tf.keras.layers.Layer):
...
@@ -835,7 +838,8 @@ class Transformer(tf.keras.layers.Layer):
self
.
hidden_size
=
hidden_size
self
.
hidden_size
=
hidden_size
self
.
num_attention_heads
=
num_attention_heads
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
intermediate_size
=
intermediate_size
self
.
intermediate_activation
=
get_activation
(
intermediate_activation
)
self
.
intermediate_activation
=
tf_utils
.
get_activation
(
intermediate_activation
)
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
initializer_range
=
initializer_range
self
.
initializer_range
=
initializer_range
...
@@ -861,7 +865,7 @@ class Transformer(tf.keras.layers.Layer):
...
@@ -861,7 +865,7 @@ class Transformer(tf.keras.layers.Layer):
super
(
Transformer
,
self
).
build
(
unused_input_shapes
)
super
(
Transformer
,
self
).
build
(
unused_input_shapes
)
def
__call__
(
self
,
input_tensor
,
attention_mask
=
None
,
**
kwargs
):
def
__call__
(
self
,
input_tensor
,
attention_mask
=
None
,
**
kwargs
):
inputs
=
pack_inputs
([
input_tensor
,
attention_mask
])
inputs
=
tf_utils
.
pack_inputs
([
input_tensor
,
attention_mask
])
return
super
(
Transformer
,
self
).
__call__
(
inputs
=
inputs
,
**
kwargs
)
return
super
(
Transformer
,
self
).
__call__
(
inputs
=
inputs
,
**
kwargs
)
def
call
(
self
,
inputs
,
return_all_layers
=
False
):
def
call
(
self
,
inputs
,
return_all_layers
=
False
):
...
@@ -874,7 +878,7 @@ class Transformer(tf.keras.layers.Layer):
...
@@ -874,7 +878,7 @@ class Transformer(tf.keras.layers.Layer):
Returns:
Returns:
Output tensor of the last layer or a list of output tensors.
Output tensor of the last layer or a list of output tensors.
"""
"""
unpacked_inputs
=
unpack_inputs
(
inputs
)
unpacked_inputs
=
tf_utils
.
unpack_inputs
(
inputs
)
input_tensor
=
unpacked_inputs
[
0
]
input_tensor
=
unpacked_inputs
[
0
]
attention_mask
=
unpacked_inputs
[
1
]
attention_mask
=
unpacked_inputs
[
1
]
output_tensor
=
input_tensor
output_tensor
=
input_tensor
...
@@ -890,108 +894,6 @@ class Transformer(tf.keras.layers.Layer):
...
@@ -890,108 +894,6 @@ class Transformer(tf.keras.layers.Layer):
return
all_layer_outputs
[
-
1
]
return
all_layer_outputs
[
-
1
]
def
pack_inputs
(
inputs
):
"""Pack a list of `inputs` tensors to a tuple.
Args:
inputs: a list of tensors.
Returns:
a tuple of tensors. if any input is None, replace it with a special constant
tensor.
"""
inputs
=
tf
.
nest
.
flatten
(
inputs
)
outputs
=
[]
for
x
in
inputs
:
if
x
is
None
:
outputs
.
append
(
tf
.
constant
(
0
,
shape
=
[],
dtype
=
tf
.
int32
))
else
:
outputs
.
append
(
x
)
return
tuple
(
outputs
)
def
unpack_inputs
(
inputs
):
"""unpack a tuple of `inputs` tensors to a tuple.
Args:
inputs: a list of tensors.
Returns:
a tuple of tensors. if any input is a special constant tensor, replace it
with None.
"""
inputs
=
tf
.
nest
.
flatten
(
inputs
)
outputs
=
[]
for
x
in
inputs
:
if
is_special_none_tensor
(
x
):
outputs
.
append
(
None
)
else
:
outputs
.
append
(
x
)
x
=
tuple
(
outputs
)
# To trick the very pointless 'unbalanced-tuple-unpacking' pylint check
# from triggering.
if
len
(
x
)
==
1
:
return
x
[
0
]
return
tuple
(
outputs
)
def
is_special_none_tensor
(
tensor
):
"""Checks if a tensor is a special None Tensor."""
return
tensor
.
shape
.
ndims
==
0
and
tensor
.
dtype
==
tf
.
int32
def
gelu
(
x
):
"""Gaussian Error Linear Unit.
This is a smoother version of the RELU.
Original paper: https://arxiv.org/abs/1606.08415
Args:
x: float Tensor to perform activation.
Returns:
`x` with the GELU activation applied.
"""
cdf
=
0.5
*
(
1.0
+
tf
.
tanh
(
(
math
.
sqrt
(
2
/
math
.
pi
)
*
(
x
+
0.044715
*
tf
.
pow
(
x
,
3
)))))
return
x
*
cdf
def
get_activation
(
identifier
):
"""Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`.
Args:
identifier: String name of the activation function.
Returns:
A Python function corresponding to the activation function. If
`identifier` is None, empty, or "linear", this will return None.
If `identifier` is not a string, it will return `identifier`.
Raises:
ValueError: The `identifier` does not correspond to a known
activation.
"""
if
identifier
is
None
:
return
None
elif
isinstance
(
identifier
,
six
.
string_types
):
name_to_fn
=
{
"linear"
:
None
,
"relu"
:
tf
.
nn
.
relu
,
"gelu"
:
gelu
,
"tanh"
:
tf
.
nn
.
tanh
,
}
identifier
=
str
(
identifier
).
lower
()
if
identifier
not
in
name_to_fn
:
raise
ValueError
(
"Unsupported activation function: %s"
%
(
identifier
))
return
name_to_fn
[
identifier
]
elif
callable
(
identifier
):
return
identifier
else
:
raise
ValueError
(
"Could not interpret activation "
"function identifier: %s"
%
(
identifier
))
def
get_initializer
(
initializer_range
=
0.02
):
def
get_initializer
(
initializer_range
=
0.02
):
"""Creates a `tf.initializers.truncated_normal` with the given range.
"""Creates a `tf.initializers.truncated_normal` with the given range.
...
@@ -1004,66 +906,6 @@ def get_initializer(initializer_range=0.02):
...
@@ -1004,66 +906,6 @@ def get_initializer(initializer_range=0.02):
return
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
initializer_range
)
return
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
initializer_range
)
def
get_shape_list
(
tensor
,
expected_rank
=
None
,
name
=
None
):
"""Returns a list of the shape of tensor, preferring static dimensions.
Args:
tensor: A tf.Tensor object to find the shape of.
expected_rank: (optional) int. The expected rank of `tensor`. If this is
specified and the `tensor` has a different rank, and exception will be
thrown.
name: Optional name of the tensor for the error message.
Returns:
A list of dimensions of the shape of tensor. All static dimensions will
be returned as python integers, and dynamic dimensions will be returned
as tf.Tensor scalars.
"""
if
expected_rank
is
not
None
:
assert_rank
(
tensor
,
expected_rank
,
name
)
shape
=
tensor
.
shape
.
as_list
()
non_static_indexes
=
[]
for
(
index
,
dim
)
in
enumerate
(
shape
):
if
dim
is
None
:
non_static_indexes
.
append
(
index
)
if
not
non_static_indexes
:
return
shape
dyn_shape
=
tf
.
shape
(
tensor
)
for
index
in
non_static_indexes
:
shape
[
index
]
=
dyn_shape
[
index
]
return
shape
def
assert_rank
(
tensor
,
expected_rank
,
name
=
None
):
"""Raises an exception if the tensor rank is not of the expected rank.
Args:
tensor: A tf.Tensor to check the rank of.
expected_rank: Python integer or list of integers, expected rank.
name: Optional name of the tensor for the error message.
Raises:
ValueError: If the expected shape doesn't match the actual shape.
"""
expected_rank_dict
=
{}
if
isinstance
(
expected_rank
,
six
.
integer_types
):
expected_rank_dict
[
expected_rank
]
=
True
else
:
for
x
in
expected_rank
:
expected_rank_dict
[
x
]
=
True
actual_rank
=
tensor
.
shape
.
ndims
if
actual_rank
not
in
expected_rank_dict
:
raise
ValueError
(
"For the tensor `%s`, the actual tensor rank `%d` (shape = %s) is not "
"equal to the expected tensor rank `%s`"
%
(
name
,
actual_rank
,
str
(
tensor
.
shape
),
str
(
expected_rank
)))
def
create_attention_mask_from_input_mask
(
from_tensor
,
to_mask
):
def
create_attention_mask_from_input_mask
(
from_tensor
,
to_mask
):
"""Create 3D attention mask from a 2D tensor mask.
"""Create 3D attention mask from a 2D tensor mask.
...
@@ -1074,11 +916,11 @@ def create_attention_mask_from_input_mask(from_tensor, to_mask):
...
@@ -1074,11 +916,11 @@ def create_attention_mask_from_input_mask(from_tensor, to_mask):
Returns:
Returns:
float Tensor of shape [batch_size, from_seq_length, to_seq_length].
float Tensor of shape [batch_size, from_seq_length, to_seq_length].
"""
"""
from_shape
=
get_shape_list
(
from_tensor
,
expected_rank
=
[
2
,
3
])
from_shape
=
tf_utils
.
get_shape_list
(
from_tensor
,
expected_rank
=
[
2
,
3
])
batch_size
=
from_shape
[
0
]
batch_size
=
from_shape
[
0
]
from_seq_length
=
from_shape
[
1
]
from_seq_length
=
from_shape
[
1
]
to_shape
=
get_shape_list
(
to_mask
,
expected_rank
=
2
)
to_shape
=
tf_utils
.
get_shape_list
(
to_mask
,
expected_rank
=
2
)
to_seq_length
=
to_shape
[
1
]
to_seq_length
=
to_shape
[
1
]
to_mask
=
tf
.
cast
(
to_mask
=
tf
.
cast
(
...
...
official/
bert
/bert_models.py
→
official/
nlp
/bert_models.py
View file @
1862b9c3
...
@@ -21,7 +21,8 @@ from __future__ import print_function
...
@@ -21,7 +21,8 @@ from __future__ import print_function
import
copy
import
copy
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.bert
import
modeling
from
official.modeling
import
tf_utils
from
official.nlp
import
bert_modeling
as
modeling
def
gather_indexes
(
sequence_tensor
,
positions
):
def
gather_indexes
(
sequence_tensor
,
positions
):
...
@@ -40,7 +41,7 @@ def gather_indexes(sequence_tensor, positions):
...
@@ -40,7 +41,7 @@ def gather_indexes(sequence_tensor, positions):
Masked out sequence tensor of shape (batch_size * max_predictions_per_seq,
Masked out sequence tensor of shape (batch_size * max_predictions_per_seq,
num_hidden).
num_hidden).
"""
"""
sequence_shape
=
modeling
.
get_shape_list
(
sequence_shape
=
tf_utils
.
get_shape_list
(
sequence_tensor
,
name
=
'sequence_output_tensor'
)
sequence_tensor
,
name
=
'sequence_output_tensor'
)
batch_size
=
sequence_shape
[
0
]
batch_size
=
sequence_shape
[
0
]
seq_length
=
sequence_shape
[
1
]
seq_length
=
sequence_shape
[
1
]
...
@@ -92,7 +93,7 @@ class BertPretrainLayer(tf.keras.layers.Layer):
...
@@ -92,7 +93,7 @@ class BertPretrainLayer(tf.keras.layers.Layer):
initializer
=
tf
.
keras
.
initializers
.
Zeros
())
initializer
=
tf
.
keras
.
initializers
.
Zeros
())
self
.
lm_dense
=
tf
.
keras
.
layers
.
Dense
(
self
.
lm_dense
=
tf
.
keras
.
layers
.
Dense
(
self
.
config
.
hidden_size
,
self
.
config
.
hidden_size
,
activation
=
modeling
.
get_activation
(
self
.
config
.
hidden_act
),
activation
=
tf_utils
.
get_activation
(
self
.
config
.
hidden_act
),
kernel_initializer
=
self
.
initializer
,
kernel_initializer
=
self
.
initializer
,
name
=
'predictions/transform/dense'
)
name
=
'predictions/transform/dense'
)
self
.
lm_layer_norm
=
tf
.
keras
.
layers
.
LayerNormalization
(
self
.
lm_layer_norm
=
tf
.
keras
.
layers
.
LayerNormalization
(
...
@@ -115,13 +116,13 @@ class BertPretrainLayer(tf.keras.layers.Layer):
...
@@ -115,13 +116,13 @@ class BertPretrainLayer(tf.keras.layers.Layer):
pooled_output
,
pooled_output
,
sequence_output
=
None
,
sequence_output
=
None
,
masked_lm_positions
=
None
):
masked_lm_positions
=
None
):
inputs
=
modeling
.
pack_inputs
(
inputs
=
tf_utils
.
pack_inputs
(
[
pooled_output
,
sequence_output
,
masked_lm_positions
])
[
pooled_output
,
sequence_output
,
masked_lm_positions
])
return
super
(
BertPretrainLayer
,
self
).
__call__
(
inputs
)
return
super
(
BertPretrainLayer
,
self
).
__call__
(
inputs
)
def
call
(
self
,
inputs
):
def
call
(
self
,
inputs
):
"""Implements call() for the layer."""
"""Implements call() for the layer."""
unpacked_inputs
=
modeling
.
unpack_inputs
(
inputs
)
unpacked_inputs
=
tf_utils
.
unpack_inputs
(
inputs
)
pooled_output
=
unpacked_inputs
[
0
]
pooled_output
=
unpacked_inputs
[
0
]
sequence_output
=
unpacked_inputs
[
1
]
sequence_output
=
unpacked_inputs
[
1
]
masked_lm_positions
=
unpacked_inputs
[
2
]
masked_lm_positions
=
unpacked_inputs
[
2
]
...
@@ -153,7 +154,7 @@ class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer):
...
@@ -153,7 +154,7 @@ class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer):
lm_label_ids
=
None
,
lm_label_ids
=
None
,
lm_label_weights
=
None
,
lm_label_weights
=
None
,
sentence_labels
=
None
):
sentence_labels
=
None
):
inputs
=
modeling
.
pack_inputs
([
inputs
=
tf_utils
.
pack_inputs
([
lm_output
,
sentence_output
,
lm_label_ids
,
lm_label_weights
,
lm_output
,
sentence_output
,
lm_label_ids
,
lm_label_weights
,
sentence_labels
sentence_labels
])
])
...
@@ -186,7 +187,7 @@ class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer):
...
@@ -186,7 +187,7 @@ class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer):
def
call
(
self
,
inputs
):
def
call
(
self
,
inputs
):
"""Implements call() for the layer."""
"""Implements call() for the layer."""
unpacked_inputs
=
modeling
.
unpack_inputs
(
inputs
)
unpacked_inputs
=
tf_utils
.
unpack_inputs
(
inputs
)
lm_output
=
unpacked_inputs
[
0
]
lm_output
=
unpacked_inputs
[
0
]
sentence_output
=
unpacked_inputs
[
1
]
sentence_output
=
unpacked_inputs
[
1
]
lm_label_ids
=
unpacked_inputs
[
2
]
lm_label_ids
=
unpacked_inputs
[
2
]
...
...
official/
bert
/optimization.py
→
official/
nlp
/optimization.py
View file @
1862b9c3
File moved
official/nlp/xlnet/optimization.py
View file @
1862b9c3
...
@@ -20,7 +20,7 @@ from __future__ import print_function
...
@@ -20,7 +20,7 @@ from __future__ import print_function
from
absl
import
logging
from
absl
import
logging
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.
be
rt
.
optimization
import
AdamWeightDecay
from
official.
nlp
impo
rt
optimization
class
WarmUp
(
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
class
WarmUp
(
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
...
@@ -86,7 +86,7 @@ def create_optimizer(init_lr,
...
@@ -86,7 +86,7 @@ def create_optimizer(init_lr,
logging
.
info
(
logging
.
info
(
"Using AdamWeightDecay with adam_epsilon=%.9f weight_decay_rate=%.3f"
,
"Using AdamWeightDecay with adam_epsilon=%.9f weight_decay_rate=%.3f"
,
adam_epsilon
,
weight_decay_rate
)
adam_epsilon
,
weight_decay_rate
)
optimizer
=
AdamWeightDecay
(
optimizer
=
optimization
.
AdamWeightDecay
(
learning_rate
=
learning_rate_fn
,
learning_rate
=
learning_rate_fn
,
weight_decay_rate
=
weight_decay_rate
,
weight_decay_rate
=
weight_decay_rate
,
beta_1
=
0.9
,
beta_1
=
0.9
,
...
...
official/transformer/v2/attention_layer.py
View file @
1862b9c3
...
@@ -19,7 +19,7 @@ from __future__ import division
...
@@ -19,7 +19,7 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.
bert
import
modeling
as
common_layer
from
official.
nlp
import
bert_
modeling
as
common_layer
class
Attention
(
tf
.
keras
.
layers
.
Layer
):
class
Attention
(
tf
.
keras
.
layers
.
Layer
):
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment