Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Fairseq
Commits
23211c45
Commit
23211c45
authored
May 03, 2018
by
Alexei Baevski
Committed by
Myle Ott
Jun 15, 2018
Browse files
make attn dropout 0.1 default for big en-de transformer
parent
d85b61d6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
11 additions
and
11 deletions
+11
-11
fairseq/models/transformer.py
fairseq/models/transformer.py
+11
-11
No files found.
fairseq/models/transformer.py
View file @
23211c45
...
@@ -423,8 +423,9 @@ def transformer_wmt_en_de(args):
...
@@ -423,8 +423,9 @@ def transformer_wmt_en_de(args):
args
.
decoder_attention_heads
=
8
args
.
decoder_attention_heads
=
8
@
register_model_architecture
(
'transformer'
,
'transformer_wmt_en_de_big'
)
# parameters used in the "Attention Is All You Need" paper (Vaswani, et al, 2017)
def
transformer_wmt_en_de_big
(
args
):
@
register_model_architecture
(
'transformer'
,
'transformer_vaswani_wmt_en_de_big'
)
def
transformer_vaswani_wmt_en_de_big
(
args
):
base_architecture
(
args
)
base_architecture
(
args
)
args
.
encoder_embed_dim
=
1024
args
.
encoder_embed_dim
=
1024
args
.
encoder_ffn_embed_dim
=
4096
args
.
encoder_ffn_embed_dim
=
4096
...
@@ -436,18 +437,17 @@ def transformer_wmt_en_de_big(args):
...
@@ -436,18 +437,17 @@ def transformer_wmt_en_de_big(args):
args
.
decoder_attention_heads
=
16
args
.
decoder_attention_heads
=
16
@
register_model_architecture
(
'transformer'
,
'transformer_wmt_en_de_big'
)
def
transformer_wmt_en_de_big
(
args
):
transformer_vaswani_wmt_en_de_big
(
args
)
args
.
attention_dropout
=
0.1
# default parameters used in tensor2tensor implementation
@
register_model_architecture
(
'transformer'
,
'transformer_wmt_en_de_big_t2t'
)
@
register_model_architecture
(
'transformer'
,
'transformer_wmt_en_de_big_t2t'
)
def
transformer_wmt_en_de_big_t2t
(
args
):
def
transformer_wmt_en_de_big_t2t
(
args
):
base_architecture
(
args
)
transformer_vaswani_wmt_en_de_big
(
args
)
args
.
encoder_embed_dim
=
1024
args
.
encoder_ffn_embed_dim
=
4096
args
.
encoder_layers
=
6
args
.
encoder_attention_heads
=
16
args
.
encoder_normalize_before
=
True
args
.
encoder_normalize_before
=
True
args
.
decoder_embed_dim
=
1024
args
.
decoder_ffn_embed_dim
=
4096
args
.
decoder_layers
=
6
args
.
decoder_attention_heads
=
16
args
.
decoder_normalize_before
=
True
args
.
decoder_normalize_before
=
True
args
.
attention_dropout
=
0.1
args
.
attention_dropout
=
0.1
args
.
relu_dropout
=
0.1
args
.
relu_dropout
=
0.1
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment