Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
5571e9b6
Commit
5571e9b6
authored
Sep 15, 2021
by
Jialu Liu
Committed by
A. Unique TensorFlower
Sep 15, 2021
Browse files
Internal change
PiperOrigin-RevId: 396831100
parent
b3537541
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
469 additions
and
21 deletions
+469
-21
official/nlp/projects/teams/experiments/base/glue_mnli.yaml
official/nlp/projects/teams/experiments/base/glue_mnli.yaml
+39
-0
official/nlp/projects/teams/experiments/base/squad_v1.yaml
official/nlp/projects/teams/experiments/base/squad_v1.yaml
+46
-0
official/nlp/projects/teams/experiments/base/squad_v2.yaml
official/nlp/projects/teams/experiments/base/squad_v2.yaml
+47
-0
official/nlp/projects/teams/experiments/base/wiki_books_pretrain.yaml
.../projects/teams/experiments/base/wiki_books_pretrain.yaml
+80
-0
official/nlp/projects/teams/experiments/small/glue_mnli.yaml
official/nlp/projects/teams/experiments/small/glue_mnli.yaml
+40
-0
official/nlp/projects/teams/experiments/small/squad_v1.yaml
official/nlp/projects/teams/experiments/small/squad_v1.yaml
+46
-0
official/nlp/projects/teams/experiments/small/squad_v2.yaml
official/nlp/projects/teams/experiments/small/squad_v2.yaml
+47
-0
official/nlp/projects/teams/experiments/small/wiki_books_pretrain.yaml
...projects/teams/experiments/small/wiki_books_pretrain.yaml
+74
-0
official/nlp/projects/teams/experiments/teams_en_uncased_base.yaml
...nlp/projects/teams/experiments/teams_en_uncased_base.yaml
+16
-0
official/nlp/projects/teams/experiments/teams_en_uncased_small.yaml
...lp/projects/teams/experiments/teams_en_uncased_small.yaml
+16
-0
official/nlp/projects/teams/teams_pretrainer.py
official/nlp/projects/teams/teams_pretrainer.py
+18
-21
No files found.
official/nlp/projects/teams/experiments/base/glue_mnli.yaml
0 → 100644
View file @
5571e9b6
task
:
hub_module_url
:
'
'
model
:
num_classes
:
3
train_data
:
drop_remainder
:
true
global_batch_size
:
32
input_path
:
'
'
is_training
:
true
seq_length
:
128
validation_data
:
drop_remainder
:
false
global_batch_size
:
32
input_path
:
'
'
is_training
:
false
seq_length
:
128
trainer
:
checkpoint_interval
:
1000
optimizer_config
:
learning_rate
:
polynomial
:
decay_steps
:
61359
end_learning_rate
:
0.0
initial_learning_rate
:
3.0e-05
power
:
1.0
type
:
polynomial
optimizer
:
type
:
adamw
warmup
:
polynomial
:
power
:
1
warmup_steps
:
6136
type
:
polynomial
steps_per_loop
:
1000
summary_interval
:
1000
# Training data size 392,702 examples, 5 epochs.
train_steps
:
61359
validation_interval
:
2000
validation_steps
:
307
official/nlp/projects/teams/experiments/base/squad_v1.yaml
0 → 100644
View file @
5571e9b6
task
:
hub_module_url
:
'
'
max_answer_length
:
30
n_best_size
:
20
null_score_diff_threshold
:
0.0
train_data
:
drop_remainder
:
true
global_batch_size
:
32
input_path
:
'
'
is_training
:
true
seq_length
:
384
validation_data
:
do_lower_case
:
true
doc_stride
:
128
drop_remainder
:
false
global_batch_size
:
32
input_path
:
'
'
is_training
:
false
query_length
:
64
seq_length
:
384
tokenization
:
WordPiece
version_2_with_negative
:
false
vocab_file
:
'
'
trainer
:
checkpoint_interval
:
500
max_to_keep
:
5
optimizer_config
:
learning_rate
:
polynomial
:
decay_steps
:
5549
end_learning_rate
:
0.0
initial_learning_rate
:
5.0e-05
power
:
1.0
type
:
polynomial
optimizer
:
type
:
adamw
warmup
:
polynomial
:
power
:
1
warmup_steps
:
555
type
:
polynomial
steps_per_loop
:
500
summary_interval
:
500
train_steps
:
5549
validation_interval
:
500
validation_steps
:
339
official/nlp/projects/teams/experiments/base/squad_v2.yaml
0 → 100644
View file @
5571e9b6
task
:
hub_module_url
:
'
'
max_answer_length
:
30
n_best_size
:
20
null_score_diff_threshold
:
0.0
train_data
:
drop_remainder
:
true
global_batch_size
:
32
input_path
:
'
'
is_training
:
true
seq_length
:
384
validation_data
:
do_lower_case
:
true
doc_stride
:
128
drop_remainder
:
false
global_batch_size
:
32
input_path
:
'
'
is_training
:
false
query_length
:
64
seq_length
:
384
tokenization
:
WordPiece
version_2_with_negative
:
true
vocab_file
:
'
'
trainer
:
checkpoint_interval
:
500
max_to_keep
:
5
optimizer_config
:
learning_rate
:
polynomial
:
decay_steps
:
8160
end_learning_rate
:
0.0
initial_learning_rate
:
5.0e-05
power
:
1.0
type
:
polynomial
optimizer
:
type
:
adamw
warmup
:
polynomial
:
name
:
polynomial
power
:
1
warmup_steps
:
816
type
:
polynomial
steps_per_loop
:
500
summary_interval
:
500
train_steps
:
8160
validation_interval
:
500
validation_steps
:
383
official/nlp/projects/teams/experiments/base/wiki_books_pretrain.yaml
0 → 100644
View file @
5571e9b6
task
:
model
:
cls_heads
:
[{
activation
:
tanh
,
cls_token_idx
:
0
,
dropout_rate
:
0.1
,
inner_dim
:
768
,
name
:
next_sentence
,
num_classes
:
2
}]
generator_encoder
:
bert
:
attention_dropout_rate
:
0.1
dropout_rate
:
0.1
embedding_size
:
768
hidden_activation
:
gelu
hidden_size
:
256
initializer_range
:
0.02
intermediate_size
:
1024
max_position_embeddings
:
512
num_attention_heads
:
4
num_layers
:
12
type_vocab_size
:
2
vocab_size
:
30522
num_masked_tokens
:
76
sequence_length
:
512
num_classes
:
2
discriminator_encoder
:
bert
:
attention_dropout_rate
:
0.1
dropout_rate
:
0.1
embedding_size
:
768
hidden_activation
:
gelu
hidden_size
:
768
initializer_range
:
0.02
intermediate_size
:
3072
max_position_embeddings
:
512
num_attention_heads
:
12
num_layers
:
12
type_vocab_size
:
2
vocab_size
:
30522
discriminator_loss_weight
:
50.0
disallow_correct
:
false
tie_embeddings
:
true
train_data
:
drop_remainder
:
true
global_batch_size
:
256
input_path
:
'
'
is_training
:
true
max_predictions_per_seq
:
76
seq_length
:
512
use_next_sentence_label
:
false
use_position_id
:
false
validation_data
:
drop_remainder
:
true
global_batch_size
:
256
input_path
:
'
'
is_training
:
false
max_predictions_per_seq
:
76
seq_length
:
512
use_next_sentence_label
:
false
use_position_id
:
false
trainer
:
checkpoint_interval
:
6000
max_to_keep
:
50
optimizer_config
:
learning_rate
:
polynomial
:
cycle
:
false
decay_steps
:
1000000
end_learning_rate
:
0.0
initial_learning_rate
:
0.0002
power
:
1.0
type
:
polynomial
optimizer
:
type
:
adamw
warmup
:
polynomial
:
power
:
1
warmup_steps
:
10000
type
:
polynomial
steps_per_loop
:
1000
summary_interval
:
1000
train_steps
:
1000000
validation_interval
:
100
validation_steps
:
64
official/nlp/projects/teams/experiments/small/glue_mnli.yaml
0 → 100644
View file @
5571e9b6
task
:
hub_module_url
:
'
'
model
:
num_classes
:
3
train_data
:
drop_remainder
:
true
global_batch_size
:
32
input_path
:
'
'
is_training
:
true
seq_length
:
128
validation_data
:
drop_remainder
:
false
global_batch_size
:
32
input_path
:
'
'
is_training
:
false
seq_length
:
128
trainer
:
checkpoint_interval
:
1000
optimizer_config
:
learning_rate
:
polynomial
:
decay_steps
:
61359
end_learning_rate
:
0.0
initial_learning_rate
:
1.0e-04
power
:
1.0
type
:
polynomial
optimizer
:
type
:
adamw
warmup
:
polynomial
:
power
:
1
warmup_steps
:
6136
type
:
polynomial
steps_per_loop
:
1000
summary_interval
:
1000
# Training data size 392,702 examples, 5 epochs.
train_steps
:
61359
validation_interval
:
2000
# Eval data size = 9815 examples.
validation_steps
:
307
official/nlp/projects/teams/experiments/small/squad_v1.yaml
0 → 100644
View file @
5571e9b6
task
:
hub_module_url
:
'
'
max_answer_length
:
30
n_best_size
:
20
null_score_diff_threshold
:
0.0
train_data
:
drop_remainder
:
true
global_batch_size
:
48
input_path
:
'
'
is_training
:
true
seq_length
:
384
validation_data
:
do_lower_case
:
true
doc_stride
:
128
drop_remainder
:
false
global_batch_size
:
48
input_path
:
'
'
is_training
:
false
query_length
:
64
seq_length
:
384
tokenization
:
WordPiece
version_2_with_negative
:
false
vocab_file
:
'
'
trainer
:
checkpoint_interval
:
500
max_to_keep
:
5
optimizer_config
:
learning_rate
:
polynomial
:
decay_steps
:
9248
end_learning_rate
:
0.0
initial_learning_rate
:
8.0e-05
power
:
1.0
type
:
polynomial
optimizer
:
type
:
adamw
warmup
:
polynomial
:
power
:
1
warmup_steps
:
925
type
:
polynomial
steps_per_loop
:
500
summary_interval
:
500
train_steps
:
9248
validation_interval
:
500
validation_steps
:
226
official/nlp/projects/teams/experiments/small/squad_v2.yaml
0 → 100644
View file @
5571e9b6
task
:
hub_module_url
:
'
'
max_answer_length
:
30
n_best_size
:
20
null_score_diff_threshold
:
0.0
train_data
:
drop_remainder
:
true
global_batch_size
:
48
input_path
:
'
'
is_training
:
true
seq_length
:
384
validation_data
:
do_lower_case
:
true
doc_stride
:
128
drop_remainder
:
false
global_batch_size
:
48
input_path
:
'
'
is_training
:
false
query_length
:
64
seq_length
:
384
tokenization
:
WordPiece
version_2_with_negative
:
true
vocab_file
:
'
'
trainer
:
checkpoint_interval
:
500
max_to_keep
:
5
optimizer_config
:
learning_rate
:
polynomial
:
decay_steps
:
13601
end_learning_rate
:
0.0
initial_learning_rate
:
8.0e-05
power
:
1.0
type
:
polynomial
optimizer
:
type
:
adamw
warmup
:
polynomial
:
name
:
polynomial
power
:
1
warmup_steps
:
1360
type
:
polynomial
steps_per_loop
:
500
summary_interval
:
500
train_steps
:
13601
validation_interval
:
500
validation_steps
:
255
official/nlp/projects/teams/experiments/small/wiki_books_pretrain.yaml
0 → 100644
View file @
5571e9b6
task
:
model
:
candidate_size
:
5
num_shared_generator_hidden_layers
:
3
num_discriminator_task_agnostic_layers
:
11
tie_embeddings
:
true
generator
:
attention_dropout_rate
:
0.1
dropout_rate
:
0.1
embedding_size
:
128
hidden_activation
:
gelu
hidden_size
:
256
initializer_range
:
0.02
intermediate_size
:
1024
max_position_embeddings
:
512
num_attention_heads
:
4
num_layers
:
6
type_vocab_size
:
2
vocab_size
:
30522
discriminator
:
attention_dropout_rate
:
0.1
dropout_rate
:
0.1
embedding_size
:
128
hidden_activation
:
gelu
hidden_size
:
256
initializer_range
:
0.02
intermediate_size
:
1024
max_position_embeddings
:
512
num_attention_heads
:
4
num_layers
:
12
type_vocab_size
:
2
vocab_size
:
30522
train_data
:
drop_remainder
:
true
global_batch_size
:
256
input_path
:
'
'
is_training
:
true
max_predictions_per_seq
:
76
seq_length
:
512
use_next_sentence_label
:
false
use_position_id
:
false
validation_data
:
drop_remainder
:
true
global_batch_size
:
256
input_path
:
'
'
is_training
:
false
max_predictions_per_seq
:
76
seq_length
:
512
use_next_sentence_label
:
false
use_position_id
:
false
trainer
:
checkpoint_interval
:
4000
max_to_keep
:
5
optimizer_config
:
learning_rate
:
polynomial
:
cycle
:
false
decay_steps
:
500000
end_learning_rate
:
0.0
initial_learning_rate
:
0.0005
power
:
1.0
type
:
polynomial
optimizer
:
type
:
adamw
warmup
:
polynomial
:
power
:
1
warmup_steps
:
10000
type
:
polynomial
steps_per_loop
:
4000
summary_interval
:
4000
train_steps
:
500000
validation_interval
:
100
validation_steps
:
64
official/nlp/projects/teams/experiments/teams_en_uncased_base.yaml
0 → 100644
View file @
5571e9b6
task
:
model
:
encoder
:
bert
:
attention_dropout_rate
:
0.1
dropout_rate
:
0.1
embedding_size
:
768
hidden_activation
:
gelu
hidden_size
:
768
initializer_range
:
0.02
intermediate_size
:
3072
max_position_embeddings
:
512
num_attention_heads
:
12
num_layers
:
12
type_vocab_size
:
2
vocab_size
:
30522
official/nlp/projects/teams/experiments/teams_en_uncased_small.yaml
0 → 100644
View file @
5571e9b6
task
:
model
:
encoder
:
bert
:
attention_dropout_rate
:
0.1
dropout_rate
:
0.1
embedding_size
:
128
hidden_activation
:
gelu
hidden_size
:
256
initializer_range
:
0.02
intermediate_size
:
1024
max_position_embeddings
:
512
num_attention_heads
:
4
num_layers
:
12
type_vocab_size
:
2
vocab_size
:
30522
official/nlp/projects/teams/teams_pretrainer.py
View file @
5571e9b6
...
@@ -47,13 +47,6 @@ class ReplacedTokenDetectionHead(tf.keras.layers.Layer):
...
@@ -47,13 +47,6 @@ class ReplacedTokenDetectionHead(tf.keras.layers.Layer):
self
.
activation
=
self
.
hidden_cfg
[
'intermediate_activation'
]
self
.
activation
=
self
.
hidden_cfg
[
'intermediate_activation'
]
self
.
initializer
=
self
.
hidden_cfg
[
'kernel_initializer'
]
self
.
initializer
=
self
.
hidden_cfg
[
'kernel_initializer'
]
if
output
not
in
(
'predictions'
,
'logits'
):
raise
ValueError
(
(
'Unknown `output` value "%s". `output` can be either "logits" or '
'"predictions"'
)
%
output
)
self
.
_output_type
=
output
def
build
(
self
,
input_shape
):
self
.
hidden_layers
=
[]
self
.
hidden_layers
=
[]
for
i
in
range
(
self
.
num_task_agnostic_layers
,
self
.
num_hidden_instances
):
for
i
in
range
(
self
.
num_task_agnostic_layers
,
self
.
num_hidden_instances
):
self
.
hidden_layers
.
append
(
self
.
hidden_layers
.
append
(
...
@@ -74,6 +67,12 @@ class ReplacedTokenDetectionHead(tf.keras.layers.Layer):
...
@@ -74,6 +67,12 @@ class ReplacedTokenDetectionHead(tf.keras.layers.Layer):
units
=
1
,
kernel_initializer
=
self
.
initializer
,
units
=
1
,
kernel_initializer
=
self
.
initializer
,
name
=
'transform/rtd_head'
)
name
=
'transform/rtd_head'
)
if
output
not
in
(
'predictions'
,
'logits'
):
raise
ValueError
(
(
'Unknown `output` value "%s". `output` can be either "logits" or '
'"predictions"'
)
%
output
)
self
.
_output_type
=
output
def
call
(
self
,
sequence_data
,
input_mask
):
def
call
(
self
,
sequence_data
,
input_mask
):
"""Compute inner-products of hidden vectors with sampled element embeddings.
"""Compute inner-products of hidden vectors with sampled element embeddings.
...
@@ -117,13 +116,6 @@ class MultiWordSelectionHead(tf.keras.layers.Layer):
...
@@ -117,13 +116,6 @@ class MultiWordSelectionHead(tf.keras.layers.Layer):
self
.
activation
=
activation
self
.
activation
=
activation
self
.
initializer
=
tf
.
keras
.
initializers
.
get
(
initializer
)
self
.
initializer
=
tf
.
keras
.
initializers
.
get
(
initializer
)
if
output
not
in
(
'predictions'
,
'logits'
):
raise
ValueError
(
(
'Unknown `output` value "%s". `output` can be either "logits" or '
'"predictions"'
)
%
output
)
self
.
_output_type
=
output
def
build
(
self
,
input_shape
):
self
.
_vocab_size
,
self
.
embed_size
=
self
.
embedding_table
.
shape
self
.
_vocab_size
,
self
.
embed_size
=
self
.
embedding_table
.
shape
self
.
dense
=
tf
.
keras
.
layers
.
Dense
(
self
.
dense
=
tf
.
keras
.
layers
.
Dense
(
self
.
embed_size
,
self
.
embed_size
,
...
@@ -133,7 +125,11 @@ class MultiWordSelectionHead(tf.keras.layers.Layer):
...
@@ -133,7 +125,11 @@ class MultiWordSelectionHead(tf.keras.layers.Layer):
self
.
layer_norm
=
tf
.
keras
.
layers
.
LayerNormalization
(
self
.
layer_norm
=
tf
.
keras
.
layers
.
LayerNormalization
(
axis
=-
1
,
epsilon
=
1e-12
,
name
=
'transform/mws_layernorm'
)
axis
=-
1
,
epsilon
=
1e-12
,
name
=
'transform/mws_layernorm'
)
super
(
MultiWordSelectionHead
,
self
).
build
(
input_shape
)
if
output
not
in
(
'predictions'
,
'logits'
):
raise
ValueError
(
(
'Unknown `output` value "%s". `output` can be either "logits" or '
'"predictions"'
)
%
output
)
self
.
_output_type
=
output
def
call
(
self
,
sequence_data
,
masked_positions
,
candidate_sets
):
def
call
(
self
,
sequence_data
,
masked_positions
,
candidate_sets
):
"""Compute inner-products of hidden vectors with sampled element embeddings.
"""Compute inner-products of hidden vectors with sampled element embeddings.
...
@@ -277,27 +273,28 @@ class TeamsPretrainer(tf.keras.Model):
...
@@ -277,27 +273,28 @@ class TeamsPretrainer(tf.keras.Model):
self
.
mlm_activation
=
mlm_activation
self
.
mlm_activation
=
mlm_activation
self
.
mlm_initializer
=
mlm_initializer
self
.
mlm_initializer
=
mlm_initializer
self
.
output_type
=
output_type
self
.
output_type
=
output_type
embedding_table
=
generator_network
.
embedding_network
.
get_embedding_table
()
self
.
embedding_table
=
(
self
.
discriminator_mws_network
.
embedding_network
.
get_embedding_table
())
self
.
masked_lm
=
layers
.
MaskedLM
(
self
.
masked_lm
=
layers
.
MaskedLM
(
embedding_table
=
embedding_table
,
embedding_table
=
self
.
embedding_table
,
activation
=
mlm_activation
,
activation
=
mlm_activation
,
initializer
=
mlm_initializer
,
initializer
=
mlm_initializer
,
output
=
output_type
,
output
=
output_type
,
name
=
'generator_masked_lm'
)
name
=
'generator_masked_lm'
)
discriminator_cfg
=
self
.
discriminator_mws_network
.
get_config
()
discriminator_cfg
=
self
.
discriminator_mws_network
.
get_config
()
self
.
num_task_agnostic_layers
=
num_discriminator_task_agnostic_layers
self
.
discriminator_rtd_head
=
ReplacedTokenDetectionHead
(
self
.
discriminator_rtd_head
=
ReplacedTokenDetectionHead
(
encoder_cfg
=
discriminator_cfg
,
encoder_cfg
=
discriminator_cfg
,
num_task_agnostic_layers
=
num_discriminator
_task_agnostic_layers
,
num_task_agnostic_layers
=
self
.
num
_task_agnostic_layers
,
output
=
output_type
,
output
=
output_type
,
name
=
'discriminator_rtd'
)
name
=
'discriminator_rtd'
)
hidden_cfg
=
discriminator_cfg
[
'hidden_cfg'
]
hidden_cfg
=
discriminator_cfg
[
'hidden_cfg'
]
self
.
discriminator_mws_head
=
MultiWordSelectionHead
(
self
.
discriminator_mws_head
=
MultiWordSelectionHead
(
embedding_table
=
embedding_table
,
embedding_table
=
self
.
embedding_table
,
activation
=
hidden_cfg
[
'intermediate_activation'
],
activation
=
hidden_cfg
[
'intermediate_activation'
],
initializer
=
hidden_cfg
[
'kernel_initializer'
],
initializer
=
hidden_cfg
[
'kernel_initializer'
],
output
=
output_type
,
output
=
output_type
,
name
=
'discriminator_mws'
)
name
=
'discriminator_mws'
)
self
.
num_task_agnostic_layers
=
num_discriminator_task_agnostic_layers
def
call
(
self
,
inputs
):
def
call
(
self
,
inputs
):
"""TEAMS forward pass.
"""TEAMS forward pass.
...
@@ -380,7 +377,7 @@ class TeamsPretrainer(tf.keras.Model):
...
@@ -380,7 +377,7 @@ class TeamsPretrainer(tf.keras.Model):
sampled_tokens
=
tf
.
stop_gradient
(
sampled_tokens
=
tf
.
stop_gradient
(
models
.
electra_pretrainer
.
sample_from_softmax
(
models
.
electra_pretrainer
.
sample_from_softmax
(
mlm_logits
,
disallow
=
None
))
mlm_logits
,
disallow
=
None
))
sampled_tokids
=
tf
.
argmax
(
sampled_tokens
,
-
1
,
output_type
=
tf
.
int32
)
sampled_tokids
=
tf
.
argmax
(
sampled_tokens
,
axis
=
-
1
,
output_type
=
tf
.
int32
)
# Prepares input and label for replaced token detection task.
# Prepares input and label for replaced token detection task.
updated_input_ids
,
masked
=
models
.
electra_pretrainer
.
scatter_update
(
updated_input_ids
,
masked
=
models
.
electra_pretrainer
.
scatter_update
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment