Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
78c43ef1
Commit
78c43ef1
authored
Jul 26, 2021
by
Gunho Park
Browse files
Merge branch 'master' of
https://github.com/tensorflow/models
parents
67cfc95b
e3c7e300
Changes
227
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
707 additions
and
411 deletions
+707
-411
official/modeling/optimization/adafactor_optimizer.py
official/modeling/optimization/adafactor_optimizer.py
+20
-0
official/modeling/optimization/configs/learning_rate_config.py
...ial/modeling/optimization/configs/learning_rate_config.py
+8
-0
official/modeling/optimization/configs/optimization_config.py
...cial/modeling/optimization/configs/optimization_config.py
+1
-0
official/modeling/optimization/configs/optimizer_config.py
official/modeling/optimization/configs/optimizer_config.py
+19
-0
official/modeling/optimization/lr_schedule.py
official/modeling/optimization/lr_schedule.py
+69
-0
official/modeling/optimization/lr_schedule_test.py
official/modeling/optimization/lr_schedule_test.py
+35
-0
official/modeling/optimization/optimizer_factory.py
official/modeling/optimization/optimizer_factory.py
+7
-5
official/modeling/performance.py
official/modeling/performance.py
+11
-41
official/modeling/tf_utils.py
official/modeling/tf_utils.py
+1
-0
official/nlp/configs/encoders.py
official/nlp/configs/encoders.py
+12
-1
official/nlp/continuous_finetune_lib.py
official/nlp/continuous_finetune_lib.py
+4
-2
official/nlp/data/classifier_data_lib.py
official/nlp/data/classifier_data_lib.py
+345
-327
official/nlp/data/classifier_data_lib_test.py
official/nlp/data/classifier_data_lib_test.py
+95
-0
official/nlp/data/create_finetuning_data.py
official/nlp/data/create_finetuning_data.py
+23
-3
official/nlp/data/sentence_prediction_dataloader.py
official/nlp/data/sentence_prediction_dataloader.py
+28
-19
official/nlp/data/sentence_prediction_dataloader_test.py
official/nlp/data/sentence_prediction_dataloader_test.py
+18
-9
official/nlp/keras_nlp/encoders/bert_encoder.py
official/nlp/keras_nlp/encoders/bert_encoder.py
+6
-0
official/nlp/keras_nlp/encoders/bert_encoder_test.py
official/nlp/keras_nlp/encoders/bert_encoder_test.py
+2
-1
official/nlp/keras_nlp/layers/position_embedding_test.py
official/nlp/keras_nlp/layers/position_embedding_test.py
+2
-2
official/nlp/keras_nlp/layers/transformer_encoder_block.py
official/nlp/keras_nlp/layers/transformer_encoder_block.py
+1
-1
No files found.
official/modeling/optimization/adafactor_optimizer.py
0 → 100644
View file @
78c43ef1
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Adafactor optimizer.
A new optimizer that will be open sourced soon.
"""
# pylint: disable=invalid-name, represents an unimplemented class definition.
Adafactor
=
"Unimplemented"
official/modeling/optimization/configs/learning_rate_config.py
View file @
78c43ef1
...
...
@@ -56,10 +56,12 @@ class StepwiseLrConfig(base_config.Config):
values[0] [boundaries[0], boundaries[1]] -> values[1]
[boundaries[n-1], boundaries[n]] -> values[n] [boundaries[n],
end] -> values[n+1] Defaults to None.
offset: An int. The offset applied to steps. Defaults to 0.
"""
name
:
str
=
'PiecewiseConstantDecay'
boundaries
:
Optional
[
List
[
int
]]
=
None
values
:
Optional
[
List
[
float
]]
=
None
offset
:
int
=
0
@
dataclasses
.
dataclass
...
...
@@ -76,12 +78,14 @@ class ExponentialLrConfig(base_config.Config):
decay_rate: A float. Defaults to None.
staircase: A boolean, if true, learning rate is decreased at discreate
intervals. Defaults to False.
offset: An int. The offset applied to steps. Defaults to 0.
"""
name
:
str
=
'ExponentialDecay'
initial_learning_rate
:
Optional
[
float
]
=
None
decay_steps
:
Optional
[
int
]
=
None
decay_rate
:
Optional
[
float
]
=
None
staircase
:
Optional
[
bool
]
=
None
offset
:
int
=
0
@
dataclasses
.
dataclass
...
...
@@ -99,6 +103,7 @@ class PolynomialLrConfig(base_config.Config):
power: A float. The power of the polynomial. Defaults to linear, 1.0.
cycle: A boolean, whether or not it should cycle beyond decay_steps.
Defaults to False.
offset: An int. The offset applied to steps. Defaults to 0.
"""
name
:
str
=
'PolynomialDecay'
initial_learning_rate
:
Optional
[
float
]
=
None
...
...
@@ -106,6 +111,7 @@ class PolynomialLrConfig(base_config.Config):
end_learning_rate
:
float
=
0.0001
power
:
float
=
1.0
cycle
:
bool
=
False
offset
:
int
=
0
@
dataclasses
.
dataclass
...
...
@@ -122,11 +128,13 @@ class CosineLrConfig(base_config.Config):
to None.
alpha: A float. Minimum learning rate value as a fraction of
initial_learning_rate.
offset: An int. The offset applied to steps. Defaults to 0.
"""
name
:
str
=
'CosineDecay'
initial_learning_rate
:
Optional
[
float
]
=
None
decay_steps
:
Optional
[
int
]
=
None
alpha
:
float
=
0.0
offset
:
int
=
0
@
dataclasses
.
dataclass
...
...
official/modeling/optimization/configs/optimization_config.py
View file @
78c43ef1
...
...
@@ -52,6 +52,7 @@ class OptimizerConfig(oneof.OneOfConfig):
lars
:
opt_cfg
.
LARSConfig
=
opt_cfg
.
LARSConfig
()
adagrad
:
opt_cfg
.
AdagradConfig
=
opt_cfg
.
AdagradConfig
()
slide
:
opt_cfg
.
SLIDEConfig
=
opt_cfg
.
SLIDEConfig
()
adafactor
:
opt_cfg
.
AdafactorConfig
=
opt_cfg
.
AdafactorConfig
()
@
dataclasses
.
dataclass
...
...
official/modeling/optimization/configs/optimizer_config.py
View file @
78c43ef1
...
...
@@ -247,3 +247,22 @@ class SLIDEConfig(BaseOptimizerConfig):
do_gradient_rescaling
:
bool
=
True
norm_type
:
str
=
"layer"
ratio_clip_norm
:
float
=
1e5
@
dataclasses
.
dataclass
class
AdafactorConfig
(
BaseOptimizerConfig
):
"""Configuration for Adafactor optimizer.
The attributes for this class matches the arguments of the Adafactor
implementation.
"""
name
:
str
=
"Adafactor"
factored
:
bool
=
True
multiply_by_parameter_scale
:
bool
=
True
beta1
:
Optional
[
float
]
=
None
decay_rate
:
float
=
0.8
step_offset
:
int
=
0
clipping_threshold
:
float
=
1.0
min_dim_size_to_factor
:
int
=
128
epsilon1
:
float
=
1e-30
epsilon2
:
float
=
1e-3
official/modeling/optimization/lr_schedule.py
View file @
78c43ef1
...
...
@@ -19,6 +19,75 @@ from typing import Mapping, Any, Union, Optional
import
tensorflow
as
tf
def
_make_offset_wrapper
(
new_class_name
:
str
,
base_lr_class
):
"""Generates a offset wrapper of learning rate schedule.
It will returns a subclass of the the `base_lr_class`, the subclass takes an
`offset` argument in the constructor. When the new class instance is called,
the behavior is:
new_class_object(step) = base_lr_class_object(step - offset)
Example:
CosineDecayWithOffset = _make_offset_wrapper(
'CosineDecayWithOffset', tf.keras.experimental.CosineDecay)
# Use the lr:
lr = CosineDecayWithOffset(offset=100, initial_learning_rate=0.1,
decay_steps=1000)
lr(101) # equals to tf.keras.experimental.CosineDecay(...)(101-100)
Args:
new_class_name: the name of the new class.
base_lr_class: the base learning rate schedule class. Should be subclass of
tf.keras.optimizers.schedules.LearningRateSchedule
Returns:
A new class (subclass of the base_lr_class) that can take an offset.
"""
assert
issubclass
(
base_lr_class
,
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
),
(
"base_lr_class should be subclass of keras "
f
"LearningRateSchedule, got
{
base_lr_class
}
"
)
# pylint: disable=protected-access,pointless-statement
def
offset_learning_rate_init
(
self
,
offset
=
0
,
**
kwargs
):
"""Construct learning rate schedule object.
When this object is called, its behavior is
self.__call__(step) == base_lr_class.__call__(step - offset)
Args:
self: this object.
offset: The offset when computing the learning rate schedule.
**kwargs: Pass through to base learning rate class constructor.
"""
base_lr_class
.
__init__
(
self
,
**
kwargs
)
self
.
_offset
=
offset
def
offset_learning_rate_call
(
self
,
step
):
step
=
tf
.
cast
(
step
-
self
.
_offset
,
tf
.
float32
)
return
base_lr_class
.
__call__
(
self
,
step
)
# pylint: enable=protected-access,pointless-statement
return
type
(
new_class_name
,
(
base_lr_class
,),
{
"base_lr_class"
:
base_lr_class
,
"__init__"
:
offset_learning_rate_init
,
"__call__"
:
offset_learning_rate_call
})
PiecewiseConstantDecayWithOffset
=
_make_offset_wrapper
(
"PiecewiseConstantDecayWithOffset"
,
tf
.
keras
.
optimizers
.
schedules
.
PiecewiseConstantDecay
)
PolynomialDecayWithOffset
=
_make_offset_wrapper
(
"PolynomialDecayWithOffset"
,
tf
.
keras
.
optimizers
.
schedules
.
PolynomialDecay
)
ExponentialDecayWithOffset
=
_make_offset_wrapper
(
"ExponentialDecayWithOffset"
,
tf
.
keras
.
optimizers
.
schedules
.
ExponentialDecay
)
CosineDecayWithOffset
=
_make_offset_wrapper
(
"CosineDecayWithOffset"
,
tf
.
keras
.
experimental
.
CosineDecay
)
class
LinearWarmup
(
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
"""Linear warmup schedule."""
...
...
official/modeling/optimization/lr_schedule_test.py
View file @
78c43ef1
...
...
@@ -70,5 +70,40 @@ class PowerAndLinearDecayTest(tf.test.TestCase, parameterized.TestCase):
self
.
assertAlmostEqual
(
lr
(
step
).
numpy
(),
value
)
class
OffsetLearningRateTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
parameters
(
dict
(
class_name
=
lr_schedule
.
PiecewiseConstantDecayWithOffset
),
dict
(
class_name
=
lr_schedule
.
PolynomialDecayWithOffset
),
dict
(
class_name
=
lr_schedule
.
ExponentialDecayWithOffset
),
dict
(
class_name
=
lr_schedule
.
CosineDecayWithOffset
),
)
def
test_generated_docstring
(
self
,
class_name
):
self
.
assertNotEmpty
(
class_name
.
__init__
.
__doc__
)
@
parameterized
.
parameters
(
dict
(
class_name
=
lr_schedule
.
PiecewiseConstantDecayWithOffset
,
kwarg
=
dict
(
boundaries
=
[
50
,
80
],
values
=
[
1.0
,
0.5
,
0.1
])),
dict
(
class_name
=
lr_schedule
.
PolynomialDecayWithOffset
,
kwarg
=
dict
(
initial_learning_rate
=
1.0
,
decay_steps
=
100
)),
dict
(
class_name
=
lr_schedule
.
ExponentialDecayWithOffset
,
kwarg
=
dict
(
initial_learning_rate
=
1.0
,
decay_steps
=
100
,
decay_rate
=
0.5
)),
dict
(
class_name
=
lr_schedule
.
CosineDecayWithOffset
,
kwarg
=
dict
(
initial_learning_rate
=
1.0
,
decay_steps
=
100
)),
)
def
test_offset
(
self
,
class_name
,
kwarg
):
offset
=
10
offset_lr
=
class_name
(
offset
=
offset
,
**
kwarg
)
base_lr
=
class_name
.
base_lr_class
(
**
kwarg
)
self
.
assertIsInstance
(
offset_lr
,
class_name
)
for
step
in
range
(
10
,
101
,
10
):
self
.
assertEqual
(
offset_lr
(
step
),
base_lr
(
step
-
offset
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/modeling/optimization/optimizer_factory.py
View file @
78c43ef1
...
...
@@ -20,6 +20,7 @@ import tensorflow as tf
import
tensorflow_addons.optimizers
as
tfa_optimizers
from
official.modeling.optimization
import
slide_optimizer
from
official.modeling.optimization
import
adafactor_optimizer
from
official.modeling.optimization
import
ema_optimizer
from
official.modeling.optimization
import
lars_optimizer
from
official.modeling.optimization
import
lr_schedule
...
...
@@ -34,14 +35,15 @@ OPTIMIZERS_CLS = {
'rmsprop'
:
tf
.
keras
.
optimizers
.
RMSprop
,
'lars'
:
lars_optimizer
.
LARS
,
'adagrad'
:
tf
.
keras
.
optimizers
.
Adagrad
,
'slide'
:
slide_optimizer
.
SLIDE
'slide'
:
slide_optimizer
.
SLIDE
,
'adafactor'
:
adafactor_optimizer
.
Adafactor
,
}
LR_CLS
=
{
'stepwise'
:
tf
.
keras
.
optimizers
.
schedule
s
.
PiecewiseConstantDecay
,
'polynomial'
:
tf
.
keras
.
optimizers
.
schedule
s
.
PolynomialDecay
,
'exponential'
:
tf
.
keras
.
optimizers
.
schedule
s
.
ExponentialDecay
,
'cosine'
:
tf
.
keras
.
experimental
.
CosineDecay
,
'stepwise'
:
lr_
schedule
.
PiecewiseConstantDecay
WithOffset
,
'polynomial'
:
lr_
schedule
.
PolynomialDecay
WithOffset
,
'exponential'
:
lr_
schedule
.
ExponentialDecay
WithOffset
,
'cosine'
:
lr_schedule
.
CosineDecay
WithOffset
,
'power'
:
lr_schedule
.
DirectPowerDecay
,
'power_linear'
:
lr_schedule
.
PowerAndLinearDecay
,
'power_with_offset'
:
lr_schedule
.
PowerDecayWithOffset
,
...
...
official/modeling/performance.py
View file @
78c43ef1
...
...
@@ -14,29 +14,16 @@
"""Functions and classes related to training performance."""
from
absl
import
logging
import
tensorflow
as
tf
def
configure_optimizer
(
optimizer
,
use_float16
=
False
,
use_graph_rewrite
=
False
,
loss_scale
=
'dynamic'
,
use_experimental_api
=
False
):
loss_scale
=
None
):
"""Configures optimizer object with performance options."""
if
use_experimental_api
:
logging
.
warning
(
'Passing use_experimental_api=True is deprecated. The '
'argument will be removed in the future.'
)
if
use_float16
:
# TODO(b/171936854): Move all methods to non-experimental api.
if
use_experimental_api
:
# Wraps optimizer with a LossScaleOptimizer. This is done automatically
# in compile() with the "mixed_float16" policy, but since we do not call
# compile(), we must wrap the optimizer manually.
optimizer
=
(
tf
.
keras
.
mixed_precision
.
experimental
.
LossScaleOptimizer
(
optimizer
,
loss_scale
=
loss_scale
))
elif
loss_scale
==
'dynamic'
:
if
loss_scale
in
(
None
,
'dynamic'
):
optimizer
=
tf
.
keras
.
mixed_precision
.
LossScaleOptimizer
(
optimizer
)
else
:
# loss_scale is a number. We interpret that as a fixed loss scale.
...
...
@@ -52,34 +39,17 @@ def configure_optimizer(optimizer,
return
optimizer
def
set_mixed_precision_policy
(
dtype
,
loss_scale
=
None
,
use_experimental_api
=
False
):
"""Sets mix precision policy."""
if
use_experimental_api
:
logging
.
warning
(
'Passing use_experimental_api=True is deprecated. The '
'argument will be removed in the future.'
)
assert
use_experimental_api
or
loss_scale
is
None
,
(
'loss_scale cannot be specified if use_experimental_api is False. If the '
'non-experimental API is used, specify the loss scaling configuration '
'when creating the LossScaleOptimizer instead.'
)
def
set_mixed_precision_policy
(
dtype
,
loss_scale
=
None
):
"""Sets the global `tf.keras.mixed_precision.Policy`."""
# TODO(b/191894773): Remove loss_scale argument
assert
loss_scale
is
None
,
(
'The loss_scale argument must be None. The argument exists for '
'historical reasons and will be removed soon.'
)
if
dtype
==
tf
.
float16
:
# TODO(b/171936854): Move all methods to non-experimental api.
if
use_experimental_api
:
policy
=
tf
.
keras
.
mixed_precision
.
experimental
.
Policy
(
'mixed_float16'
,
loss_scale
=
loss_scale
)
tf
.
keras
.
mixed_precision
.
experimental
.
set_policy
(
policy
)
else
:
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'mixed_float16'
)
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'mixed_float16'
)
elif
dtype
==
tf
.
bfloat16
:
if
use_experimental_api
:
tf
.
keras
.
mixed_precision
.
experimental
.
set_policy
(
'mixed_bfloat16'
)
else
:
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'mixed_bfloat16'
)
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'mixed_bfloat16'
)
elif
dtype
==
tf
.
float32
:
if
use_experimental_api
:
tf
.
keras
.
mixed_precision
.
experimental
.
set_policy
(
'float32'
)
else
:
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'float32'
)
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'float32'
)
else
:
raise
ValueError
(
'Unexpected dtype: %s'
%
dtype
)
official/modeling/tf_utils.py
View file @
78c43ef1
...
...
@@ -108,6 +108,7 @@ def get_activation(identifier, use_keras_layer=False):
"linear"
:
"linear"
,
"identity"
:
"linear"
,
"swish"
:
"swish"
,
"sigmoid"
:
"sigmoid"
,
"relu6"
:
tf
.
nn
.
relu6
,
}
if
identifier
in
keras_layer_allowlist
:
...
...
official/nlp/configs/encoders.py
View file @
78c43ef1
...
...
@@ -46,6 +46,8 @@ class BertEncoderConfig(hyperparams.Config):
embedding_size
:
Optional
[
int
]
=
None
output_range
:
Optional
[
int
]
=
None
return_all_encoder_outputs
:
bool
=
False
# Pre/Post-LN Transformer
norm_first
:
bool
=
False
@
dataclasses
.
dataclass
...
...
@@ -132,6 +134,8 @@ class BigBirdEncoderConfig(hyperparams.Config):
intermediate_size
:
int
=
3072
dropout_rate
:
float
=
0.1
attention_dropout_rate
:
float
=
0.1
# Pre/Post-LN Transformer
norm_first
:
bool
=
False
max_position_embeddings
:
int
=
4096
num_rand_blocks
:
int
=
3
block_size
:
int
=
64
...
...
@@ -152,6 +156,8 @@ class KernelEncoderConfig(hyperparams.Config):
intermediate_size
:
int
=
3072
dropout_rate
:
float
=
0.1
attention_dropout_rate
:
float
=
0.1
# Pre/Post-LN Transformer
norm_first
:
bool
=
False
max_position_embeddings
:
int
=
512
type_vocab_size
:
int
=
2
initializer_range
:
float
=
0.02
...
...
@@ -161,6 +167,7 @@ class KernelEncoderConfig(hyperparams.Config):
redraw
:
bool
=
False
is_short_seq
:
bool
=
False
begin_kernel
:
int
=
0
scale
:
Optional
[
float
]
=
None
@
dataclasses
.
dataclass
...
...
@@ -339,6 +346,7 @@ def build_encoder(config: EncoderConfig,
encoder_cfg
.
hidden_activation
),
dropout_rate
=
encoder_cfg
.
dropout_rate
,
attention_dropout_rate
=
encoder_cfg
.
attention_dropout_rate
,
norm_first
=
encoder_cfg
.
norm_first
,
kernel_initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
encoder_cfg
.
initializer_range
),
attention_cls
=
layers
.
BigBirdAttention
,
...
...
@@ -377,6 +385,7 @@ def build_encoder(config: EncoderConfig,
redraw
=
encoder_cfg
.
redraw
,
is_short_seq
=
encoder_cfg
.
is_short_seq
,
begin_kernel
=
encoder_cfg
.
begin_kernel
,
scale
=
encoder_cfg
.
scale
,
)
hidden_cfg
=
dict
(
num_attention_heads
=
encoder_cfg
.
num_attention_heads
,
...
...
@@ -385,6 +394,7 @@ def build_encoder(config: EncoderConfig,
encoder_cfg
.
hidden_activation
),
dropout_rate
=
encoder_cfg
.
dropout_rate
,
attention_dropout_rate
=
encoder_cfg
.
attention_dropout_rate
,
norm_first
=
encoder_cfg
.
norm_first
,
kernel_initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
encoder_cfg
.
initializer_range
),
attention_cls
=
layers
.
KernelAttention
,
...
...
@@ -445,4 +455,5 @@ def build_encoder(config: EncoderConfig,
embedding_width
=
encoder_cfg
.
embedding_size
,
embedding_layer
=
embedding_layer
,
return_all_encoder_outputs
=
encoder_cfg
.
return_all_encoder_outputs
,
dict_outputs
=
True
)
dict_outputs
=
True
,
norm_first
=
encoder_cfg
.
norm_first
)
official/nlp/continuous_finetune_lib.py
View file @
78c43ef1
...
...
@@ -28,7 +28,6 @@ from official.core import train_lib
from
official.core
import
train_utils
from
official.modeling
import
performance
from
official.modeling.multitask
import
configs
from
official.modeling.multitask
import
multitask
from
official.modeling.multitask
import
train_lib
as
multitask_train_lib
...
...
@@ -167,7 +166,10 @@ def run_continuous_finetune(
with
distribution_strategy
.
scope
():
if
isinstance
(
params
,
configs
.
MultiEvalExperimentConfig
):
task
=
task_factory
.
get_task
(
params_replaced
.
task
)
eval_tasks
=
multitask
.
MultiTask
.
from_config
(
params_replaced
.
eval_tasks
)
eval_tasks
=
[
task_factory
.
get_task
(
config
.
task_config
,
name
=
config
.
task_name
)
for
config
in
params
.
eval_tasks
]
(
_
,
eval_metrics
)
=
multitask_train_lib
.
run_experiment_with_multitask_eval
(
distribution_strategy
=
distribution_strategy
,
...
...
official/nlp/data/classifier_data_lib.py
View file @
78c43ef1
This diff is collapsed.
Click to expand it.
official/nlp/data/classifier_data_lib_test.py
0 → 100644
View file @
78c43ef1
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for third_party.tensorflow_models.official.nlp.data.classifier_data_lib."""
import
os
import
tempfile
from
absl.testing
import
parameterized
import
tensorflow
as
tf
import
tensorflow_datasets
as
tfds
from
official.nlp.bert
import
tokenization
from
official.nlp.data
import
classifier_data_lib
def
decode_record
(
record
,
name_to_features
):
"""Decodes a record to a TensorFlow example."""
return
tf
.
io
.
parse_single_example
(
record
,
name_to_features
)
class
BertClassifierLibTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
def
setUp
(
self
):
super
(
BertClassifierLibTest
,
self
).
setUp
()
self
.
model_dir
=
self
.
get_temp_dir
()
self
.
processors
=
{
"CB"
:
classifier_data_lib
.
CBProcessor
,
"SUPERGLUE-RTE"
:
classifier_data_lib
.
SuperGLUERTEProcessor
,
"BOOLQ"
:
classifier_data_lib
.
BoolQProcessor
,
"WIC"
:
classifier_data_lib
.
WiCProcessor
,
}
vocab_tokens
=
[
"[UNK]"
,
"[CLS]"
,
"[SEP]"
,
"want"
,
"##want"
,
"##ed"
,
"wa"
,
"un"
,
"runn"
,
"##ing"
,
","
]
with
tempfile
.
NamedTemporaryFile
(
delete
=
False
)
as
vocab_writer
:
vocab_writer
.
write
(
""
.
join
([
x
+
"
\n
"
for
x
in
vocab_tokens
]).
encode
(
"utf-8"
))
vocab_file
=
vocab_writer
.
name
self
.
tokenizer
=
tokenization
.
FullTokenizer
(
vocab_file
)
@
parameterized
.
parameters
(
{
"task_type"
:
"CB"
},
{
"task_type"
:
"BOOLQ"
},
{
"task_type"
:
"SUPERGLUE-RTE"
},
{
"task_type"
:
"WIC"
},
)
def
test_generate_dataset_from_tfds_processor
(
self
,
task_type
):
with
tfds
.
testing
.
mock_data
(
num_examples
=
5
):
output_path
=
os
.
path
.
join
(
self
.
model_dir
,
task_type
)
processor
=
self
.
processors
[
task_type
]()
classifier_data_lib
.
generate_tf_record_from_data_file
(
processor
,
None
,
self
.
tokenizer
,
train_data_output_path
=
output_path
,
eval_data_output_path
=
output_path
,
test_data_output_path
=
output_path
)
files
=
tf
.
io
.
gfile
.
glob
(
output_path
)
self
.
assertNotEmpty
(
files
)
train_dataset
=
tf
.
data
.
TFRecordDataset
(
output_path
)
seq_length
=
128
label_type
=
tf
.
int64
name_to_features
=
{
"input_ids"
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
"input_mask"
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
"segment_ids"
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
"label_ids"
:
tf
.
io
.
FixedLenFeature
([],
label_type
),
}
train_dataset
=
train_dataset
.
map
(
lambda
record
:
decode_record
(
record
,
name_to_features
))
# If data is retrieved without error, then all requirements
# including data type/shapes are met.
_
=
next
(
iter
(
train_dataset
))
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
official/nlp/data/create_finetuning_data.py
View file @
78c43ef1
...
...
@@ -50,7 +50,7 @@ flags.DEFINE_enum(
"classification_task_name"
,
"MNLI"
,
[
"AX"
,
"COLA"
,
"IMDB"
,
"MNLI"
,
"MRPC"
,
"PAWS-X"
,
"QNLI"
,
"QQP"
,
"RTE"
,
"SST-2"
,
"STS-B"
,
"WNLI"
,
"XNLI"
,
"XTREME-XNLI"
,
"XTREME-PAWS-X"
,
"AX-g"
,
"SUPERGLUE-RTE"
,
"CB"
,
"BoolQ"
"AX-g"
,
"SUPERGLUE-RTE"
,
"CB"
,
"BoolQ"
,
"WIC"
],
"The name of the task to train BERT classifier. The "
"difference between XTREME-XNLI and XNLI is: 1. the format "
"of input tsv files; 2. the dev set for XTREME is english "
...
...
@@ -173,8 +173,26 @@ flags.DEFINE_string(
def
generate_classifier_dataset
():
"""Generates classifier dataset and returns input meta data."""
assert
(
FLAGS
.
input_data_dir
and
FLAGS
.
classification_task_name
or
FLAGS
.
tfds_params
)
if
FLAGS
.
classification_task_name
in
[
"COLA"
,
"WNLI"
,
"SST-2"
,
"MRPC"
,
"QQP"
,
"STS-B"
,
"MNLI"
,
"QNLI"
,
"RTE"
,
"AX"
,
"SUPERGLUE-RTE"
,
"CB"
,
"BoolQ"
,
"WIC"
,
]:
assert
not
FLAGS
.
input_data_dir
or
FLAGS
.
tfds_params
else
:
assert
(
FLAGS
.
input_data_dir
and
FLAGS
.
classification_task_name
or
FLAGS
.
tfds_params
)
if
FLAGS
.
tokenization
==
"WordPiece"
:
tokenizer
=
tokenization
.
FullTokenizer
(
...
...
@@ -248,6 +266,8 @@ def generate_classifier_dataset():
classifier_data_lib
.
CBProcessor
,
"boolq"
:
classifier_data_lib
.
BoolQProcessor
,
"wic"
:
classifier_data_lib
.
WnliProcessor
,
}
task_name
=
FLAGS
.
classification_task_name
.
lower
()
if
task_name
not
in
processors
:
...
...
official/nlp/data/sentence_prediction_dataloader.py
View file @
78c43ef1
...
...
@@ -60,8 +60,8 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
else
:
self
.
_label_name_mapping
=
dict
()
def
_decode
(
self
,
record
:
tf
.
Tensor
):
"""De
codes a serialized tf.Example
."""
def
name_to_features_spec
(
self
):
"""De
fines features to decode. Subclass may override to append features
."""
label_type
=
LABEL_TYPES_MAP
[
self
.
_params
.
label_type
]
name_to_features
=
{
'input_ids'
:
tf
.
io
.
FixedLenFeature
([
self
.
_seq_length
],
tf
.
int64
),
...
...
@@ -72,7 +72,11 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
if
self
.
_include_example_id
:
name_to_features
[
'example_id'
]
=
tf
.
io
.
FixedLenFeature
([],
tf
.
int64
)
example
=
tf
.
io
.
parse_single_example
(
record
,
name_to_features
)
return
name_to_features
def
_decode
(
self
,
record
:
tf
.
Tensor
):
"""Decodes a serialized tf.Example."""
example
=
tf
.
io
.
parse_single_example
(
record
,
self
.
name_to_features_spec
())
# tf.Example only supports tf.int64, but the TPU only supports tf.int32.
# So cast all int64 to int32.
...
...
@@ -86,20 +90,23 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
def
_parse
(
self
,
record
:
Mapping
[
str
,
tf
.
Tensor
]):
"""Parses raw tensors into a dict of tensors to be consumed by the model."""
x
=
{
'input_
word_
ids'
:
record
[
'input_ids'
]
,
'input_mask'
:
record
[
'input_mask'
]
,
'
input_type_ids'
:
record
[
'segment
_ids'
]
key_mapping
=
{
'input_ids'
:
'input_
word_
ids'
,
'input_mask'
:
'input_mask'
,
'
segment_ids'
:
'input_type
_ids'
}
if
self
.
_include_example_id
:
x
[
'example_id'
]
=
record
[
'example_id'
]
x
[
self
.
_label_field
]
=
record
[
self
.
_label_field
]
ret
=
{}
for
record_key
in
record
:
if
record_key
in
key_mapping
:
ret
[
key_mapping
[
record_key
]]
=
record
[
record_key
]
else
:
ret
[
record_key
]
=
record
[
record_key
]
if
self
.
_label_field
in
self
.
_label_name_mapping
:
x
[
self
.
_label_name_mapping
[
self
.
_label_field
]]
=
record
[
self
.
_label_field
]
ret
[
self
.
_label_name_mapping
[
self
.
_label_field
]]
=
record
[
self
.
_label_field
]
return
x
return
ret
def
load
(
self
,
input_context
:
Optional
[
tf
.
distribute
.
InputContext
]
=
None
):
"""Returns a tf.dataset.Dataset."""
...
...
@@ -215,13 +222,12 @@ class SentencePredictionTextDataLoader(data_loader.DataLoader):
"""Berts preprocess."""
segments
=
[
record
[
x
]
for
x
in
self
.
_text_fields
]
model_inputs
=
self
.
_text_processor
(
segments
)
if
self
.
_include_example_i
d
:
model_inputs
[
'example_id'
]
=
record
[
'example_id'
]
model_inputs
[
self
.
_label_field
]
=
record
[
self
.
_label_field
]
for
key
in
recor
d
:
if
key
not
in
self
.
_text_fields
:
model_inputs
[
key
]
=
record
[
key
]
return
model_inputs
def
_decode
(
self
,
record
:
tf
.
Tensor
):
"""Decodes a serialized tf.Example."""
def
name_to_features_spec
(
self
):
name_to_features
=
{}
for
text_field
in
self
.
_text_fields
:
name_to_features
[
text_field
]
=
tf
.
io
.
FixedLenFeature
([],
tf
.
string
)
...
...
@@ -230,8 +236,11 @@ class SentencePredictionTextDataLoader(data_loader.DataLoader):
name_to_features
[
self
.
_label_field
]
=
tf
.
io
.
FixedLenFeature
([],
label_type
)
if
self
.
_include_example_id
:
name_to_features
[
'example_id'
]
=
tf
.
io
.
FixedLenFeature
([],
tf
.
int64
)
example
=
tf
.
io
.
parse_single_example
(
record
,
name_to_features
)
return
name_to_features
def
_decode
(
self
,
record
:
tf
.
Tensor
):
"""Decodes a serialized tf.Example."""
example
=
tf
.
io
.
parse_single_example
(
record
,
self
.
name_to_features_spec
())
# tf.Example only supports tf.int64, but the TPU only supports tf.int32.
# So cast all int64 to int32.
for
name
in
example
:
...
...
official/nlp/data/sentence_prediction_dataloader_test.py
View file @
78c43ef1
...
...
@@ -198,9 +198,12 @@ class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase,
dataset
=
loader
.
SentencePredictionTextDataLoader
(
data_config
).
load
()
features
=
next
(
iter
(
dataset
))
label_field
=
data_config
.
label_field
self
.
assertCountEqual
(
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
label_field
],
features
.
keys
())
expected_keys
=
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
label_field
]
if
use_tfds
:
expected_keys
+=
[
'idx'
]
self
.
assertCountEqual
(
expected_keys
,
features
.
keys
())
self
.
assertEqual
(
features
[
'input_word_ids'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_mask'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_type_ids'
].
shape
,
(
batch_size
,
seq_length
))
...
...
@@ -233,9 +236,12 @@ class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase,
dataset
=
loader
.
SentencePredictionTextDataLoader
(
data_config
).
load
()
features
=
next
(
iter
(
dataset
))
label_field
=
data_config
.
label_field
self
.
assertCountEqual
(
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
label_field
],
features
.
keys
())
expected_keys
=
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
label_field
]
if
use_tfds
:
expected_keys
+=
[
'idx'
]
self
.
assertCountEqual
(
expected_keys
,
features
.
keys
())
self
.
assertEqual
(
features
[
'input_word_ids'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_mask'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_type_ids'
].
shape
,
(
batch_size
,
seq_length
))
...
...
@@ -268,9 +274,12 @@ class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase,
dataset
=
loader
.
SentencePredictionTextDataLoader
(
data_config
).
load
()
features
=
next
(
iter
(
dataset
))
label_field
=
data_config
.
label_field
self
.
assertCountEqual
(
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
label_field
],
features
.
keys
())
expected_keys
=
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
label_field
]
if
use_tfds
:
expected_keys
+=
[
'idx'
]
self
.
assertCountEqual
(
expected_keys
,
features
.
keys
())
self
.
assertEqual
(
features
[
'input_word_ids'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_mask'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_type_ids'
].
shape
,
(
batch_size
,
seq_length
))
...
...
official/nlp/keras_nlp/encoders/bert_encoder.py
View file @
78c43ef1
...
...
@@ -69,6 +69,9 @@ class BertEncoder(tf.keras.Model):
smaller than 'hidden_size').
embedding_layer: An optional Layer instance which will be called to
generate embeddings for the input word IDs.
norm_first: Whether to normalize inputs to attention and intermediate
dense layers. If set False, output of attention and intermediate dense
layers is normalized.
"""
def
__init__
(
...
...
@@ -87,6 +90,7 @@ class BertEncoder(tf.keras.Model):
output_range
=
None
,
embedding_width
=
None
,
embedding_layer
=
None
,
norm_first
=
False
,
**
kwargs
):
activation
=
tf
.
keras
.
activations
.
get
(
inner_activation
)
initializer
=
tf
.
keras
.
initializers
.
get
(
initializer
)
...
...
@@ -162,6 +166,7 @@ class BertEncoder(tf.keras.Model):
inner_activation
=
inner_activation
,
output_dropout
=
output_dropout
,
attention_dropout
=
attention_dropout
,
norm_first
=
norm_first
,
output_range
=
transformer_output_range
,
kernel_initializer
=
initializer
,
name
=
'transformer/layer_%d'
%
i
)
...
...
@@ -211,6 +216,7 @@ class BertEncoder(tf.keras.Model):
'output_range'
:
output_range
,
'embedding_width'
:
embedding_width
,
'embedding_layer'
:
embedding_layer
,
'norm_first'
:
norm_first
,
}
# We are storing the config dict as a namedtuple here to ensure checkpoint
...
...
official/nlp/keras_nlp/encoders/bert_encoder_test.py
View file @
78c43ef1
...
...
@@ -205,7 +205,8 @@ class BertEncoderTest(keras_parameterized.TestCase):
initializer
=
"glorot_uniform"
,
output_range
=-
1
,
embedding_width
=
16
,
embedding_layer
=
None
)
embedding_layer
=
None
,
norm_first
=
False
)
network
=
bert_encoder
.
BertEncoder
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
expected_config
[
"inner_activation"
]
=
tf
.
keras
.
activations
.
serialize
(
...
...
official/nlp/keras_nlp/layers/position_embedding_test.py
View file @
78c43ef1
...
...
@@ -48,12 +48,12 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
test_layer
=
position_embedding
.
PositionEmbedding
(
max_length
=
sequence_length
,
seq_axis
=
2
)
width
=
30
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,
width
,
width
))
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
width
,
sequence_length
,
width
))
output_tensor
=
test_layer
(
input_tensor
)
# When using static positional embedding shapes, the output is expected
# to be the same as the input shape in all dimensions save batch.
expected_output_shape
=
[
None
,
sequence_length
,
width
,
width
]
expected_output_shape
=
[
None
,
width
,
sequence_length
,
width
]
self
.
assertEqual
(
expected_output_shape
,
output_tensor
.
shape
.
as_list
())
# The default output dtype for this layer should be tf.float32.
self
.
assertEqual
(
tf
.
float32
,
output_tensor
.
dtype
)
...
...
official/nlp/keras_nlp/layers/transformer_encoder_block.py
View file @
78c43ef1
...
...
@@ -249,7 +249,7 @@ class TransformerEncoderBlock(tf.keras.layers.Layer):
attention.
Returns:
An ouput tensor with the same dimensions as input/query tensor.
An ou
t
put tensor with the same dimensions as input/query tensor.
"""
if
isinstance
(
inputs
,
(
list
,
tuple
)):
if
len
(
inputs
)
==
2
:
...
...
Prev
1
2
3
4
5
6
…
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment