Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
20897493
Commit
20897493
authored
Jun 02, 2020
by
Tianqi Liu
Committed by
A. Unique TensorFlower
Jun 02, 2020
Browse files
Internal change
PiperOrigin-RevId: 314451720
parent
2db2501b
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
109 additions
and
127 deletions
+109
-127
official/nlp/data/classifier_data_lib.py
official/nlp/data/classifier_data_lib.py
+85
-0
official/nlp/data/create_finetuning_data.py
official/nlp/data/create_finetuning_data.py
+13
-3
official/nlp/modeling/layers/masked_softmax.py
official/nlp/modeling/layers/masked_softmax.py
+0
-1
official/nlp/modeling/layers/position_embedding.py
official/nlp/modeling/layers/position_embedding.py
+0
-80
official/nlp/modeling/layers/position_embedding_test.py
official/nlp/modeling/layers/position_embedding_test.py
+4
-32
official/nlp/transformer/transformer.py
official/nlp/transformer/transformer.py
+7
-11
No files found.
official/nlp/data/classifier_data_lib.py
View file @
20897493
...
@@ -187,6 +187,91 @@ class XnliProcessor(DataProcessor):
...
@@ -187,6 +187,91 @@ class XnliProcessor(DataProcessor):
return
"XNLI"
return
"XNLI"
class
PawsxProcessor
(
DataProcessor
):
"""Processor for the PAWS-X data set."""
supported_languages
=
[
"de"
,
"en"
,
"es"
,
"fr"
,
"ja"
,
"ko"
,
"zh"
]
def
__init__
(
self
,
language
=
"en"
,
process_text_fn
=
tokenization
.
convert_to_unicode
):
super
(
PawsxProcessor
,
self
).
__init__
(
process_text_fn
)
if
language
==
"all"
:
self
.
languages
=
PawsxProcessor
.
supported_languages
elif
language
not
in
PawsxProcessor
.
supported_languages
:
raise
ValueError
(
"language %s is not supported for PAWS-X task."
%
language
)
else
:
self
.
languages
=
[
language
]
def
get_train_examples
(
self
,
data_dir
):
"""See base class."""
lines
=
[]
for
language
in
self
.
languages
:
if
language
==
"en"
:
train_tsv
=
"train.tsv"
else
:
train_tsv
=
"translated_train.tsv"
# Skips the header.
lines
.
extend
(
self
.
_read_tsv
(
os
.
path
.
join
(
data_dir
,
language
,
train_tsv
))[
1
:])
examples
=
[]
for
(
i
,
line
)
in
enumerate
(
lines
):
guid
=
"train-%d"
%
i
text_a
=
self
.
process_text_fn
(
line
[
1
])
text_b
=
self
.
process_text_fn
(
line
[
2
])
label
=
self
.
process_text_fn
(
line
[
3
])
examples
.
append
(
InputExample
(
guid
=
guid
,
text_a
=
text_a
,
text_b
=
text_b
,
label
=
label
))
return
examples
def
get_dev_examples
(
self
,
data_dir
):
"""See base class."""
lines
=
[]
for
language
in
PawsxProcessor
.
supported_languages
:
# Skips the header.
lines
.
extend
(
self
.
_read_tsv
(
os
.
path
.
join
(
data_dir
,
language
,
"dev_2k.tsv"
))[
1
:])
examples
=
[]
for
(
i
,
line
)
in
enumerate
(
lines
):
guid
=
"dev-%d"
%
i
text_a
=
self
.
process_text_fn
(
line
[
1
])
text_b
=
self
.
process_text_fn
(
line
[
2
])
label
=
self
.
process_text_fn
(
line
[
3
])
examples
.
append
(
InputExample
(
guid
=
guid
,
text_a
=
text_a
,
text_b
=
text_b
,
label
=
label
))
return
examples
def
get_test_examples
(
self
,
data_dir
):
"""See base class."""
examples_by_lang
=
{
k
:
[]
for
k
in
PawsxProcessor
.
supported_languages
}
for
language
in
PawsxProcessor
.
supported_languages
:
lines
=
self
.
_read_tsv
(
os
.
path
.
join
(
data_dir
,
language
,
"test_2k.tsv"
))
for
(
i
,
line
)
in
enumerate
(
lines
):
if
i
==
0
:
continue
guid
=
"test-%d"
%
i
text_a
=
self
.
process_text_fn
(
line
[
1
])
text_b
=
self
.
process_text_fn
(
line
[
2
])
label
=
self
.
process_text_fn
(
line
[
3
])
examples_by_lang
[
language
].
append
(
InputExample
(
guid
=
guid
,
text_a
=
text_a
,
text_b
=
text_b
,
label
=
label
))
return
examples_by_lang
def
get_labels
(
self
):
"""See base class."""
return
[
"0"
,
"1"
]
@
staticmethod
def
get_processor_name
():
"""See base class."""
return
"PAWS-X"
class
MnliProcessor
(
DataProcessor
):
class
MnliProcessor
(
DataProcessor
):
"""Processor for the MultiNLI data set (GLUE version)."""
"""Processor for the MultiNLI data set (GLUE version)."""
...
...
official/nlp/data/create_finetuning_data.py
View file @
20897493
...
@@ -47,14 +47,21 @@ flags.DEFINE_string(
...
@@ -47,14 +47,21 @@ flags.DEFINE_string(
"for the task."
)
"for the task."
)
flags
.
DEFINE_enum
(
"classification_task_name"
,
"MNLI"
,
flags
.
DEFINE_enum
(
"classification_task_name"
,
"MNLI"
,
[
"COLA"
,
"MNLI"
,
"MRPC"
,
"QNLI"
,
"QQP"
,
"SST-2"
,
"XNLI"
],
[
"COLA"
,
"MNLI"
,
"MRPC"
,
"QNLI"
,
"QQP"
,
"SST-2"
,
"XNLI"
,
"PAWS-X"
],
"The name of the task to train BERT classifier."
)
"The name of the task to train BERT classifier."
)
# XNLI task specific flag.
# XNLI task specific flag.
flags
.
DEFINE_string
(
flags
.
DEFINE_string
(
"xnli_language"
,
"en"
,
"xnli_language"
,
"en"
,
"Language of training and evaluation data for XNIL task. If the value is "
"Language of training data for XNIL task. If the value is 'all', the data "
"'all', the data of all languages will be used for training."
)
"of all languages will be used for training."
)
# PAWS-X task specific flag.
flags
.
DEFINE_string
(
"pawsx_language"
,
"en"
,
"Language of trainig data for PAWS-X task. If the value is 'all', the data "
"of all languages will be used for training."
)
# BERT Squad task specific flags.
# BERT Squad task specific flags.
flags
.
DEFINE_string
(
flags
.
DEFINE_string
(
...
@@ -166,6 +173,9 @@ def generate_classifier_dataset():
...
@@ -166,6 +173,9 @@ def generate_classifier_dataset():
"xnli"
:
"xnli"
:
functools
.
partial
(
classifier_data_lib
.
XnliProcessor
,
functools
.
partial
(
classifier_data_lib
.
XnliProcessor
,
language
=
FLAGS
.
xnli_language
),
language
=
FLAGS
.
xnli_language
),
"paws-x"
:
functools
.
partial
(
classifier_data_lib
.
PawsxProcessor
,
language
=
FLAGS
.
pawsx_language
)
}
}
task_name
=
FLAGS
.
classification_task_name
.
lower
()
task_name
=
FLAGS
.
classification_task_name
.
lower
()
if
task_name
not
in
processors
:
if
task_name
not
in
processors
:
...
...
official/nlp/modeling/layers/masked_softmax.py
View file @
20897493
...
@@ -59,7 +59,6 @@ class MaskedSoftmax(tf.keras.layers.Layer):
...
@@ -59,7 +59,6 @@ class MaskedSoftmax(tf.keras.layers.Layer):
# Since we are adding it to the raw scores before the softmax, this is
# Since we are adding it to the raw scores before the softmax, this is
# effectively the same as removing these entirely.
# effectively the same as removing these entirely.
scores
+=
adder
scores
+=
adder
if
len
(
self
.
_normalization_axes
)
==
1
:
if
len
(
self
.
_normalization_axes
)
==
1
:
...
...
official/nlp/modeling/layers/position_embedding.py
View file @
20897493
...
@@ -19,8 +19,6 @@ from __future__ import division
...
@@ -19,8 +19,6 @@ from __future__ import division
# from __future__ import google_type_annotations
# from __future__ import google_type_annotations
from
__future__
import
print_function
from
__future__
import
print_function
import
math
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.modeling
import
tf_utils
...
@@ -120,81 +118,3 @@ class PositionEmbedding(tf.keras.layers.Layer):
...
@@ -120,81 +118,3 @@ class PositionEmbedding(tf.keras.layers.Layer):
position_embeddings
=
self
.
_position_embeddings
position_embeddings
=
self
.
_position_embeddings
return
tf
.
broadcast_to
(
position_embeddings
,
input_shape
)
return
tf
.
broadcast_to
(
position_embeddings
,
input_shape
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
"Text"
)
class
RelativePositionEmbedding
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a positional embedding.
This layer calculates the position encoding as a mix of sine and cosine
functions with geometrically increasing wavelengths. Defined and formulized in
"Attention is All You Need", section 3.5.
(https://arxiv.org/abs/1706.03762).
Arguments:
hidden_size: Size of the hidden layer.
min_timescale: Minimum scale that will be applied at each position
max_timescale: Maximum scale that will be applied at each position.
length: Number of positions. Should be specified if `inputs` is None at
`call(self, inputs)`
"""
def
__init__
(
self
,
hidden_size
,
min_timescale
=
1.0
,
max_timescale
=
1.0e4
,
length
=
None
,
**
kwargs
):
# We need to have a default dtype of float32, since the inputs (which Keras
# usually uses to infer the dtype) will always be int32.
# We compute the positional encoding in float32 even if the model uses
# float16, as many of the ops used, like log and exp, are numerically
# unstable in float16.
if
"dtype"
not
in
kwargs
:
kwargs
[
"dtype"
]
=
"float32"
super
(
RelativePositionEmbedding
,
self
).
__init__
(
**
kwargs
)
self
.
_hidden_size
=
hidden_size
self
.
_min_timescale
=
min_timescale
self
.
_max_timescale
=
max_timescale
self
.
_length
=
length
def
get_config
(
self
):
config
=
{
"hidden_size"
:
self
.
_hidden_size
,
"min_timescale"
:
self
.
_min_timescale
,
"max_timescale"
:
self
.
_max_timescale
,
"length"
:
self
.
_length
,
}
base_config
=
super
(
RelativePositionEmbedding
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
build
(
self
,
input_shape
):
"""Implements build() for the layer."""
super
(
RelativePositionEmbedding
,
self
).
build
(
input_shape
)
def
call
(
self
,
inputs
):
"""Implements call() for the layer."""
length
=
self
.
_length
if
inputs
is
None
and
length
is
None
:
raise
ValueError
(
"If inputs is None, `length` must be set in "
"RelativePositionEmbedding()."
)
if
inputs
is
not
None
:
input_shape
=
tf_utils
.
get_shape_list
(
inputs
)
if
length
is
not
None
and
length
!=
input_shape
[
1
]:
raise
ValueError
(
"If inputs is not None, `length` must equal to input_shape[1]."
)
length
=
input_shape
[
1
]
position
=
tf
.
cast
(
tf
.
range
(
length
),
tf
.
float32
)
num_timescales
=
self
.
_hidden_size
//
2
min_timescale
,
max_timescale
=
self
.
_min_timescale
,
self
.
_max_timescale
log_timescale_increment
=
(
math
.
log
(
float
(
max_timescale
)
/
float
(
min_timescale
))
/
(
tf
.
cast
(
num_timescales
,
tf
.
float32
)
-
1
))
inv_timescales
=
min_timescale
*
tf
.
exp
(
tf
.
cast
(
tf
.
range
(
num_timescales
),
tf
.
float32
)
*
-
log_timescale_increment
)
scaled_time
=
tf
.
expand_dims
(
position
,
1
)
*
tf
.
expand_dims
(
inv_timescales
,
0
)
position_embeddings
=
tf
.
concat
([
tf
.
sin
(
scaled_time
),
tf
.
cos
(
scaled_time
)],
axis
=
1
)
return
position_embeddings
official/nlp/modeling/layers/position_embedding_test.py
View file @
20897493
...
@@ -36,7 +36,7 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
...
@@ -36,7 +36,7 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
sequence_length
=
21
sequence_length
=
21
width
=
30
width
=
30
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,
width
))
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,
width
))
output_tensor
=
test_layer
(
input_tensor
)
# pylint: disable=not-callable
output_tensor
=
test_layer
(
input_tensor
)
# When using static positional embedding shapes, the output is expected
# When using static positional embedding shapes, the output is expected
# to be the same as the input shape in all dimensions save batch.
# to be the same as the input shape in all dimensions save batch.
...
@@ -51,7 +51,7 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
...
@@ -51,7 +51,7 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
sequence_length
=
21
sequence_length
=
21
width
=
30
width
=
30
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,
width
))
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,
width
))
output_tensor
=
test_layer
(
input_tensor
)
# pylint: disable=not-callable
output_tensor
=
test_layer
(
input_tensor
)
# When using static positional embedding shapes, the output is expected
# When using static positional embedding shapes, the output is expected
# to be the same as the input shape in all dimensions save batch.
# to be the same as the input shape in all dimensions save batch.
...
@@ -67,7 +67,7 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
...
@@ -67,7 +67,7 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
# Create a 3-dimensional input (the first dimension is implicit).
# Create a 3-dimensional input (the first dimension is implicit).
width
=
30
width
=
30
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
width
))
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
width
))
output_tensor
=
test_layer
(
input_tensor
)
# pylint: disable=not-callable
output_tensor
=
test_layer
(
input_tensor
)
# When using dynamic positional embedding shapes, the output is expected
# When using dynamic positional embedding shapes, the output is expected
# to be the same as the input shape in all dimensions - but may be None if
# to be the same as the input shape in all dimensions - but may be None if
...
@@ -82,7 +82,7 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
...
@@ -82,7 +82,7 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
# Create a 3-dimensional input (the first dimension is implicit).
# Create a 3-dimensional input (the first dimension is implicit).
width
=
30
width
=
30
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
width
))
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
width
))
output_tensor
=
test_layer
(
input_tensor
)
# pylint: disable=not-callable
output_tensor
=
test_layer
(
input_tensor
)
model
=
tf
.
keras
.
Model
(
input_tensor
,
output_tensor
)
model
=
tf
.
keras
.
Model
(
input_tensor
,
output_tensor
)
...
@@ -98,34 +98,6 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
...
@@ -98,34 +98,6 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
self
.
assertAllEqual
([
1
,
input_length
,
width
],
output_data
.
shape
)
self
.
assertAllEqual
([
1
,
input_length
,
width
],
output_data
.
shape
)
def
test_relative_tensor_input
(
self
):
hidden_size
=
8
test_layer
=
position_embedding
.
RelativePositionEmbedding
(
hidden_size
=
hidden_size
)
# create a 3-dimensional input for test_layer to infer length as 1.
input_tensor
=
tf
.
constant
([[[
0
]
*
hidden_size
]])
output_tensor
=
test_layer
(
input_tensor
)
# pylint: disable=not-callable
# expected output is the theoretical result of the input based on
# sine cosine relative position embedding formula.
expected_output_tensor
=
tf
.
constant
([[
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
]])
self
.
assertAllEqual
(
output_tensor
,
expected_output_tensor
)
def
test_relative_length_input
(
self
):
hidden_size
=
8
# When we do not have tensor as input, we explicitly specify length
# value when initializing test_layer.
test_layer
=
position_embedding
.
RelativePositionEmbedding
(
hidden_size
=
hidden_size
,
length
=
1
)
input_tensor
=
None
output_tensor
=
test_layer
(
input_tensor
)
# pylint: disable=not-callable
# expected output is the theoretical result of the input based on
# sine cosine relative position embedding formula.
expected_output_tensor
=
tf
.
constant
([[
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
]])
self
.
assertAllEqual
(
output_tensor
,
expected_output_tensor
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
tf
.
test
.
main
()
official/nlp/transformer/transformer.py
View file @
20897493
...
@@ -22,7 +22,6 @@ from __future__ import division
...
@@ -22,7 +22,6 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.nlp.modeling.layers
import
position_embedding
from
official.nlp.transformer
import
attention_layer
from
official.nlp.transformer
import
attention_layer
from
official.nlp.transformer
import
beam_search
from
official.nlp.transformer
import
beam_search
from
official.nlp.transformer
import
embedding_layer
from
official.nlp.transformer
import
embedding_layer
...
@@ -171,9 +170,9 @@ class Transformer(tf.keras.Model):
...
@@ -171,9 +170,9 @@ class Transformer(tf.keras.Model):
attention_bias
=
tf
.
cast
(
attention_bias
,
self
.
params
[
"dtype"
])
attention_bias
=
tf
.
cast
(
attention_bias
,
self
.
params
[
"dtype"
])
with
tf
.
name_scope
(
"add_pos_encoding"
):
with
tf
.
name_scope
(
"add_pos_encoding"
):
pos_layer
=
position_embedding
.
RelativePositionEmbedding
(
length
=
tf
.
shape
(
embedded_inputs
)[
1
]
hidden_size
=
self
.
params
[
"hidden_size"
])
pos_encoding
=
model_utils
.
get_position_encoding
(
pos_encoding
=
pos_layer
(
embedded_inputs
)
length
,
self
.
params
[
"hidden_size"
]
)
pos_encoding
=
tf
.
cast
(
pos_encoding
,
self
.
params
[
"dtype"
])
pos_encoding
=
tf
.
cast
(
pos_encoding
,
self
.
params
[
"dtype"
])
encoder_inputs
=
embedded_inputs
+
pos_encoding
encoder_inputs
=
embedded_inputs
+
pos_encoding
...
@@ -210,9 +209,8 @@ class Transformer(tf.keras.Model):
...
@@ -210,9 +209,8 @@ class Transformer(tf.keras.Model):
[[
0
,
0
],
[
1
,
0
],
[
0
,
0
]])[:,
:
-
1
,
:]
[[
0
,
0
],
[
1
,
0
],
[
0
,
0
]])[:,
:
-
1
,
:]
with
tf
.
name_scope
(
"add_pos_encoding"
):
with
tf
.
name_scope
(
"add_pos_encoding"
):
length
=
tf
.
shape
(
decoder_inputs
)[
1
]
length
=
tf
.
shape
(
decoder_inputs
)[
1
]
pos_layer
=
position_embedding
.
RelativePositionEmbedding
(
pos_encoding
=
model_utils
.
get_position_encoding
(
hidden_size
=
self
.
params
[
"hidden_size"
])
length
,
self
.
params
[
"hidden_size"
])
pos_encoding
=
pos_layer
(
decoder_inputs
)
pos_encoding
=
tf
.
cast
(
pos_encoding
,
self
.
params
[
"dtype"
])
pos_encoding
=
tf
.
cast
(
pos_encoding
,
self
.
params
[
"dtype"
])
decoder_inputs
+=
pos_encoding
decoder_inputs
+=
pos_encoding
if
training
:
if
training
:
...
@@ -235,10 +233,8 @@ class Transformer(tf.keras.Model):
...
@@ -235,10 +233,8 @@ class Transformer(tf.keras.Model):
def
_get_symbols_to_logits_fn
(
self
,
max_decode_length
,
training
):
def
_get_symbols_to_logits_fn
(
self
,
max_decode_length
,
training
):
"""Returns a decoding function that calculates logits of the next tokens."""
"""Returns a decoding function that calculates logits of the next tokens."""
pos_layer
=
position_embedding
.
RelativePositionEmbedding
(
timing_signal
=
model_utils
.
get_position_encoding
(
hidden_size
=
self
.
params
[
"hidden_size"
],
max_decode_length
+
1
,
self
.
params
[
"hidden_size"
])
length
=
max_decode_length
+
1
)
timing_signal
=
pos_layer
(
None
)
timing_signal
=
tf
.
cast
(
timing_signal
,
self
.
params
[
"dtype"
])
timing_signal
=
tf
.
cast
(
timing_signal
,
self
.
params
[
"dtype"
])
decoder_self_attention_bias
=
model_utils
.
get_decoder_self_attention_bias
(
decoder_self_attention_bias
=
model_utils
.
get_decoder_self_attention_bias
(
max_decode_length
,
dtype
=
self
.
params
[
"dtype"
])
max_decode_length
,
dtype
=
self
.
params
[
"dtype"
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment