Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
6e0b2ccf
Commit
6e0b2ccf
authored
Jan 25, 2021
by
Hongkun Yu
Committed by
A. Unique TensorFlower
Jan 25, 2021
Browse files
Make bigbird encoder config more consistent with the encoder class.
PiperOrigin-RevId: 353723657
parent
fb9f9ee6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
13 additions
and
12 deletions
+13
-12
official/nlp/configs/encoders.py
official/nlp/configs/encoders.py
+3
-3
official/nlp/projects/bigbird/encoder.py
official/nlp/projects/bigbird/encoder.py
+8
-7
official/nlp/projects/bigbird/encoder_test.py
official/nlp/projects/bigbird/encoder_test.py
+2
-2
No files found.
official/nlp/configs/encoders.py
View file @
6e0b2ccf
...
...
@@ -136,7 +136,7 @@ class BigBirdEncoderConfig(hyperparams.Config):
block_size
:
int
=
64
type_vocab_size
:
int
=
16
initializer_range
:
float
=
0.02
embedding_
size
:
Optional
[
int
]
=
None
embedding_
width
:
Optional
[
int
]
=
None
@
dataclasses
.
dataclass
...
...
@@ -290,11 +290,11 @@ def build_encoder(config: EncoderConfig,
attention_dropout_rate
=
encoder_cfg
.
attention_dropout_rate
,
num_rand_blocks
=
encoder_cfg
.
num_rand_blocks
,
block_size
=
encoder_cfg
.
block_size
,
max_
sequence_length
=
encoder_cfg
.
max_position_embeddings
,
max_
position_embeddings
=
encoder_cfg
.
max_position_embeddings
,
type_vocab_size
=
encoder_cfg
.
type_vocab_size
,
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
encoder_cfg
.
initializer_range
),
embedding_width
=
encoder_cfg
.
embedding_
size
)
embedding_width
=
encoder_cfg
.
embedding_
width
)
if
encoder_type
==
"xlnet"
:
return
encoder_cls
(
...
...
official/nlp/projects/bigbird/encoder.py
View file @
6e0b2ccf
...
...
@@ -36,9 +36,10 @@ class BigBirdEncoder(tf.keras.Model):
num_layers: The number of transformer layers.
num_attention_heads: The number of attention heads for each transformer. The
hidden size must be divisible by the number of attention heads.
max_sequence_length: The maximum sequence length that this encoder can
consume. If None, max_sequence_length uses the value from sequence length.
This determines the variable shape for positional embeddings.
max_position_embeddings: The maximum length of position embeddings that this
encoder can consume. If None, max_position_embeddings uses the value from
sequence length. This determines the variable shape for positional
embeddings.
type_vocab_size: The number of types that the 'type_ids' input can take.
intermediate_size: The intermediate size for the transformer layers.
activation: The activation to use for the transformer layers.
...
...
@@ -58,7 +59,7 @@ class BigBirdEncoder(tf.keras.Model):
hidden_size
=
768
,
num_layers
=
12
,
num_attention_heads
=
12
,
max_
sequence_length
=
attention
.
MAX_SEQ_LEN
,
max_
position_embeddings
=
attention
.
MAX_SEQ_LEN
,
type_vocab_size
=
16
,
intermediate_size
=
3072
,
block_size
=
64
,
...
...
@@ -78,7 +79,7 @@ class BigBirdEncoder(tf.keras.Model):
'hidden_size'
:
hidden_size
,
'num_layers'
:
num_layers
,
'num_attention_heads'
:
num_attention_heads
,
'max_
sequence_length'
:
max_sequence_length
,
'max_
position_embeddings'
:
max_position_embeddings
,
'type_vocab_size'
:
type_vocab_size
,
'intermediate_size'
:
intermediate_size
,
'block_size'
:
block_size
,
...
...
@@ -109,7 +110,7 @@ class BigBirdEncoder(tf.keras.Model):
# Always uses dynamic slicing for simplicity.
self
.
_position_embedding_layer
=
keras_nlp
.
layers
.
PositionEmbedding
(
initializer
=
initializer
,
max_length
=
max_
sequence_length
,
max_length
=
max_
position_embeddings
,
name
=
'position_embedding'
)
position_embeddings
=
self
.
_position_embedding_layer
(
word_embeddings
)
self
.
_type_embedding_layer
=
keras_nlp
.
layers
.
OnDeviceEmbedding
(
...
...
@@ -159,7 +160,7 @@ class BigBirdEncoder(tf.keras.Model):
from_block_size
=
block_size
,
to_block_size
=
block_size
,
num_rand_blocks
=
num_rand_blocks
,
max_rand_mask_length
=
max_
sequence_length
,
max_rand_mask_length
=
max_
position_embeddings
,
seed
=
i
),
dropout_rate
=
dropout_rate
,
attention_dropout_rate
=
dropout_rate
,
...
...
official/nlp/projects/bigbird/encoder_test.py
View file @
6e0b2ccf
...
...
@@ -27,7 +27,7 @@ class BigBirdEncoderTest(tf.test.TestCase):
batch_size
=
2
vocab_size
=
1024
network
=
encoder
.
BigBirdEncoder
(
num_layers
=
1
,
vocab_size
=
1024
,
max_
sequence_length
=
4096
)
num_layers
=
1
,
vocab_size
=
1024
,
max_
position_embeddings
=
4096
)
word_id_data
=
np
.
random
.
randint
(
vocab_size
,
size
=
(
batch_size
,
sequence_length
))
mask_data
=
np
.
random
.
randint
(
2
,
size
=
(
batch_size
,
sequence_length
))
...
...
@@ -41,7 +41,7 @@ class BigBirdEncoderTest(tf.test.TestCase):
batch_size
=
2
vocab_size
=
1024
network
=
encoder
.
BigBirdEncoder
(
num_layers
=
1
,
vocab_size
=
1024
,
max_
sequence_length
=
4096
)
num_layers
=
1
,
vocab_size
=
1024
,
max_
position_embeddings
=
4096
)
word_id_data
=
np
.
random
.
randint
(
vocab_size
,
size
=
(
batch_size
,
sequence_length
))
mask_data
=
np
.
random
.
randint
(
2
,
size
=
(
batch_size
,
sequence_length
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment