Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Fairseq
Commits
ac5fddfc
Commit
ac5fddfc
authored
Jul 11, 2018
by
Mehdi Drissi
Committed by
Myle Ott
Jul 11, 2018
Browse files
Fix up model defaults (#211)
parent
f26b6aff
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
52 additions
and
30 deletions
+52
-30
fairseq/models/fconv.py
fairseq/models/fconv.py
+8
-6
fairseq/models/fconv_self_att.py
fairseq/models/fconv_self_att.py
+27
-15
fairseq/models/lstm.py
fairseq/models/lstm.py
+5
-3
fairseq/models/transformer.py
fairseq/models/transformer.py
+12
-6
No files found.
fairseq/models/fconv.py
View file @
ac5fddfc
...
@@ -31,17 +31,17 @@ class FConvModel(FairseqModel):
...
@@ -31,17 +31,17 @@ class FConvModel(FairseqModel):
@
staticmethod
@
staticmethod
def
add_args
(
parser
):
def
add_args
(
parser
):
"""Add model-specific arguments to the parser."""
"""Add model-specific arguments to the parser."""
parser
.
add_argument
(
'--dropout'
,
default
=
0.1
,
type
=
float
,
metavar
=
'D'
,
parser
.
add_argument
(
'--dropout'
,
type
=
float
,
metavar
=
'D'
,
help
=
'dropout probability'
)
help
=
'dropout probability'
)
parser
.
add_argument
(
'--encoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--encoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'encoder embedding dimension'
)
help
=
'encoder embedding dimension'
)
parser
.
add_argument
(
'--encoder-embed-path'
,
default
=
None
,
type
=
str
,
metavar
=
'STR'
,
parser
.
add_argument
(
'--encoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
help
=
'path to pre-trained encoder embedding'
)
help
=
'path to pre-trained encoder embedding'
)
parser
.
add_argument
(
'--encoder-layers'
,
type
=
str
,
metavar
=
'EXPR'
,
parser
.
add_argument
(
'--encoder-layers'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'encoder layers [(dim, kernel_size), ...]'
)
help
=
'encoder layers [(dim, kernel_size), ...]'
)
parser
.
add_argument
(
'--decoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--decoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'decoder embedding dimension'
)
help
=
'decoder embedding dimension'
)
parser
.
add_argument
(
'--decoder-embed-path'
,
default
=
None
,
type
=
str
,
metavar
=
'STR'
,
parser
.
add_argument
(
'--decoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
help
=
'path to pre-trained decoder embedding'
)
help
=
'path to pre-trained decoder embedding'
)
parser
.
add_argument
(
'--decoder-layers'
,
type
=
str
,
metavar
=
'EXPR'
,
parser
.
add_argument
(
'--decoder-layers'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'decoder layers [(dim, kernel_size), ...]'
)
help
=
'decoder layers [(dim, kernel_size), ...]'
)
...
@@ -49,7 +49,7 @@ class FConvModel(FairseqModel):
...
@@ -49,7 +49,7 @@ class FConvModel(FairseqModel):
help
=
'decoder output embedding dimension'
)
help
=
'decoder output embedding dimension'
)
parser
.
add_argument
(
'--decoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
parser
.
add_argument
(
'--decoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'decoder attention [True, ...]'
)
help
=
'decoder attention [True, ...]'
)
parser
.
add_argument
(
'--normalization-constant'
,
type
=
float
,
default
=
0.5
,
metavar
=
'D'
,
parser
.
add_argument
(
'--normalization-constant'
,
type
=
float
,
metavar
=
'D'
,
help
=
'multiplies the result of the residual block by sqrt(value)'
)
help
=
'multiplies the result of the residual block by sqrt(value)'
)
parser
.
add_argument
(
'--share-input-output-embed'
,
action
=
'store_true'
,
parser
.
add_argument
(
'--share-input-output-embed'
,
action
=
'store_true'
,
help
=
'share input and output embeddings (requires'
help
=
'share input and output embeddings (requires'
...
@@ -104,7 +104,7 @@ class FConvLanguageModel(FairseqLanguageModel):
...
@@ -104,7 +104,7 @@ class FConvLanguageModel(FairseqLanguageModel):
@
staticmethod
@
staticmethod
def
add_args
(
parser
):
def
add_args
(
parser
):
"""Add model-specific arguments to the parser."""
"""Add model-specific arguments to the parser."""
parser
.
add_argument
(
'--dropout'
,
default
=
0.1
,
type
=
float
,
metavar
=
'D'
,
parser
.
add_argument
(
'--dropout'
,
type
=
float
,
metavar
=
'D'
,
help
=
'dropout probability'
)
help
=
'dropout probability'
)
parser
.
add_argument
(
'--decoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--decoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'decoder embedding dimension'
)
help
=
'decoder embedding dimension'
)
...
@@ -117,7 +117,7 @@ class FConvLanguageModel(FairseqLanguageModel):
...
@@ -117,7 +117,7 @@ class FConvLanguageModel(FairseqLanguageModel):
'Must be used with adaptive_loss criterion'
)
'Must be used with adaptive_loss criterion'
)
parser
.
add_argument
(
'--decoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
parser
.
add_argument
(
'--decoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'decoder attention [True, ...]'
)
help
=
'decoder attention [True, ...]'
)
parser
.
add_argument
(
'--normalization-constant'
,
type
=
float
,
default
=
0.5
,
metavar
=
'D'
,
parser
.
add_argument
(
'--normalization-constant'
,
type
=
float
,
metavar
=
'D'
,
help
=
'multiplies the result of the residual block by sqrt(value)'
)
help
=
'multiplies the result of the residual block by sqrt(value)'
)
@
classmethod
@
classmethod
...
@@ -611,6 +611,7 @@ def ConvTBC(in_channels, out_channels, kernel_size, dropout=0, **kwargs):
...
@@ -611,6 +611,7 @@ def ConvTBC(in_channels, out_channels, kernel_size, dropout=0, **kwargs):
@
register_model_architecture
(
'fconv_lm'
,
'fconv_lm'
)
@
register_model_architecture
(
'fconv_lm'
,
'fconv_lm'
)
def
base_lm_architecture
(
args
):
def
base_lm_architecture
(
args
):
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
128
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
128
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
'[(1268, 4)] * 13'
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
'[(1268, 4)] * 13'
)
args
.
decoder_attention
=
getattr
(
args
,
'decoder_attention'
,
'False'
)
args
.
decoder_attention
=
getattr
(
args
,
'decoder_attention'
,
'False'
)
...
@@ -650,6 +651,7 @@ def fconv_lm_dauphin_gbw(args):
...
@@ -650,6 +651,7 @@ def fconv_lm_dauphin_gbw(args):
@
register_model_architecture
(
'fconv'
,
'fconv'
)
@
register_model_architecture
(
'fconv'
,
'fconv'
)
def
base_architecture
(
args
):
def
base_architecture
(
args
):
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
512
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
512
)
args
.
encoder_embed_path
=
getattr
(
args
,
'encoder_embed_path'
,
None
)
args
.
encoder_embed_path
=
getattr
(
args
,
'encoder_embed_path'
,
None
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
'[(512, 3)] * 20'
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
'[(512, 3)] * 20'
)
...
...
fairseq/models/fconv_self_att.py
View file @
ac5fddfc
...
@@ -41,7 +41,7 @@ class FConvModelSelfAtt(FairseqModel):
...
@@ -41,7 +41,7 @@ class FConvModelSelfAtt(FairseqModel):
@
staticmethod
@
staticmethod
def
add_args
(
parser
):
def
add_args
(
parser
):
"""Add model-specific arguments to the parser."""
"""Add model-specific arguments to the parser."""
parser
.
add_argument
(
'--dropout'
,
default
=
0.1
,
type
=
float
,
metavar
=
'D'
,
parser
.
add_argument
(
'--dropout'
,
type
=
float
,
metavar
=
'D'
,
help
=
'dropout probability'
)
help
=
'dropout probability'
)
parser
.
add_argument
(
'--encoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--encoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'encoder embedding dimension'
)
help
=
'encoder embedding dimension'
)
...
@@ -55,25 +55,25 @@ class FConvModelSelfAtt(FairseqModel):
...
@@ -55,25 +55,25 @@ class FConvModelSelfAtt(FairseqModel):
help
=
'decoder output embedding dimension'
)
help
=
'decoder output embedding dimension'
)
parser
.
add_argument
(
'--decoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
parser
.
add_argument
(
'--decoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'decoder attention [True, ...]'
)
help
=
'decoder attention [True, ...]'
)
parser
.
add_argument
(
'--self-attention'
,
default
=
'False'
,
type
=
str
,
metavar
=
'EXPR'
,
parser
.
add_argument
(
'--self-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'decoder self-attention layers, ex: [True] + [False]*5'
)
help
=
'decoder self-attention layers, ex: [True] + [False]*5'
)
parser
.
add_argument
(
'--multihead-attention-nheads'
,
default
=
1
,
type
=
int
,
parser
.
add_argument
(
'--multihead-attention-nheads'
,
type
=
int
,
help
=
'Number of heads to use in attention'
)
help
=
'Number of heads to use in attention'
)
parser
.
add_argument
(
'--multihead-self-attention-nheads'
,
default
=
1
,
type
=
int
,
parser
.
add_argument
(
'--multihead-self-attention-nheads'
,
type
=
int
,
help
=
'Number of heads to use in self-attention'
)
help
=
'Number of heads to use in self-attention'
)
parser
.
add_argument
(
'--encoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
default
=
'False'
,
parser
.
add_argument
(
'--encoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'encoder attention [True, ...]'
)
help
=
'encoder attention [True, ...]'
)
parser
.
add_argument
(
'--encoder-attention-nheads'
,
default
=
1
,
type
=
int
,
parser
.
add_argument
(
'--encoder-attention-nheads'
,
type
=
int
,
help
=
'Number of heads to use in encoder attention'
)
help
=
'Number of heads to use in encoder attention'
)
parser
.
add_argument
(
'--project-input'
,
type
=
str
,
metavar
=
'EXPR'
,
default
=
'False'
,
parser
.
add_argument
(
'--project-input'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'Use projections in self-attention [True, ...]'
)
help
=
'Use projections in self-attention [True, ...]'
)
parser
.
add_argument
(
'--gated-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
default
=
'False'
,
parser
.
add_argument
(
'--gated-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'Use GLU layers in self-attention projections [True, ...]'
)
help
=
'Use GLU layers in self-attention projections [True, ...]'
)
parser
.
add_argument
(
'--downsample'
,
type
=
str
,
metavar
=
'EXPR'
,
default
=
'False'
,
parser
.
add_argument
(
'--downsample'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'Use downsampling in self-attention [True, ...]'
)
help
=
'Use downsampling in self-attention [True, ...]'
)
parser
.
add_argument
(
'--pretrained-checkpoint'
,
metavar
=
'DIR'
,
default
=
''
,
parser
.
add_argument
(
'--pretrained-checkpoint'
,
metavar
=
'DIR'
,
help
=
'path to load checkpoint from pretrained model'
)
help
=
'path to load checkpoint from pretrained model'
)
parser
.
add_argument
(
'--pretrained'
,
type
=
str
,
metavar
=
'EXPR'
,
default
=
'False'
,
parser
.
add_argument
(
'--pretrained'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'use pretrained model when training [True, ...]'
)
help
=
'use pretrained model when training [True, ...]'
)
@
classmethod
@
classmethod
...
@@ -499,22 +499,34 @@ def ConvTBC(in_channels, out_channels, kernel_size, dropout=0, **kwargs):
...
@@ -499,22 +499,34 @@ def ConvTBC(in_channels, out_channels, kernel_size, dropout=0, **kwargs):
@
register_model_architecture
(
'fconv_self_att'
,
'fconv_self_att'
)
@
register_model_architecture
(
'fconv_self_att'
,
'fconv_self_att'
)
def
base_architecture
(
args
):
def
base_architecture
(
args
):
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
512
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
512
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
'[(512, 3)] * 3'
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
'[(512, 3)] * 3'
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
512
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
512
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
'[(512, 3)] * 8'
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
'[(512, 3)] * 8'
)
args
.
decoder_out_embed_dim
=
getattr
(
args
,
'decoder_out_embed_dim'
,
256
)
args
.
decoder_out_embed_dim
=
getattr
(
args
,
'decoder_out_embed_dim'
,
256
)
args
.
decoder_attention
=
getattr
(
args
,
'decoder_attention'
,
'True'
)
args
.
decoder_attention
=
getattr
(
args
,
'decoder_attention'
,
'True'
)
args
.
self_attention
=
getattr
(
args
,
'self_attention'
,
'False'
)
args
.
encoder_attention
=
getattr
(
args
,
'encoder_attention'
,
'False'
)
args
.
multihead_attention_nheads
=
getattr
(
args
,
'multihead_attention_nheads'
,
1
)
args
.
multihead_self_attention_nheads
=
getattr
(
args
,
'multihead_self_attention_nheads'
,
1
)
args
.
encoder_attention_nheads
=
getattr
(
args
,
'encoder_attention_nheads'
,
1
)
args
.
project_input
=
getattr
(
args
,
'project_input'
,
'False'
)
args
.
gated_attention
=
getattr
(
args
,
'gated_attention'
,
'False'
)
args
.
downsample
=
getattr
(
args
,
'downsample'
,
'False'
)
args
.
pretrained_checkpoint
=
getattr
(
args
,
'pretrained_checkpoint'
,
''
)
args
.
pretrained
=
getattr
(
args
,
'pretrained'
,
'False'
)
@
register_model_architecture
(
'fconv_self_att'
,
'fconv_self_att_wp'
)
@
register_model_architecture
(
'fconv_self_att'
,
'fconv_self_att_wp'
)
def
fconv_self_att_wp
(
args
):
def
fconv_self_att_wp
(
args
):
base_architecture
(
args
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
256
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
256
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
'[(128, 3)] * 2 + [(512,3)] * 1'
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
'[(128, 3)] * 2 + [(512,3)] * 1'
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
256
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
256
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
'[(512, 4)] * 4 + [(768, 4)] * 2 + [(1024, 4)] * 1'
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
'[(512, 4)] * 4 + [(768, 4)] * 2 + [(1024, 4)] * 1'
)
args
.
decoder_out_embed_dim
=
getattr
(
args
,
'decoder_out_embed_dim'
,
256
)
args
.
decoder_out_embed_dim
=
getattr
(
args
,
'decoder_out_embed_dim'
,
256
)
args
.
multihead_attention_nheads
=
getattr
(
args
,
'multihead_attention_nheads'
,
1
)
args
.
self_attention
=
getattr
(
args
,
'self_attention'
,
'True'
)
args
.
encoder_attention_nheads
=
getattr
(
args
,
'encoder_attention_nheads'
,
1
)
args
.
multihead_self_attention_nheads
=
getattr
(
args
,
'multihead_self_attention_nheads'
,
4
)
args
.
multihead_self_attention_nheads
=
getattr
(
args
,
'multihead_self_attention_nheads'
,
4
)
args
.
project_input
=
getattr
(
args
,
'project_input'
,
'True'
)
args
.
gated_attention
=
getattr
(
args
,
'gated_attention'
,
'True'
)
args
.
downsample
=
getattr
(
args
,
'downsample'
,
'True'
)
base_architecture
(
args
)
fairseq/models/lstm.py
View file @
ac5fddfc
...
@@ -25,11 +25,11 @@ class LSTMModel(FairseqModel):
...
@@ -25,11 +25,11 @@ class LSTMModel(FairseqModel):
@
staticmethod
@
staticmethod
def
add_args
(
parser
):
def
add_args
(
parser
):
"""Add model-specific arguments to the parser."""
"""Add model-specific arguments to the parser."""
parser
.
add_argument
(
'--dropout'
,
default
=
0.1
,
type
=
float
,
metavar
=
'D'
,
parser
.
add_argument
(
'--dropout'
,
type
=
float
,
metavar
=
'D'
,
help
=
'dropout probability'
)
help
=
'dropout probability'
)
parser
.
add_argument
(
'--encoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--encoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'encoder embedding dimension'
)
help
=
'encoder embedding dimension'
)
parser
.
add_argument
(
'--encoder-embed-path'
,
default
=
None
,
type
=
str
,
metavar
=
'STR'
,
parser
.
add_argument
(
'--encoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
help
=
'path to pre-trained encoder embedding'
)
help
=
'path to pre-trained encoder embedding'
)
parser
.
add_argument
(
'--encoder-hidden-size'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--encoder-hidden-size'
,
type
=
int
,
metavar
=
'N'
,
help
=
'encoder hidden size'
)
help
=
'encoder hidden size'
)
...
@@ -39,7 +39,7 @@ class LSTMModel(FairseqModel):
...
@@ -39,7 +39,7 @@ class LSTMModel(FairseqModel):
help
=
'make all layers of encoder bidirectional'
)
help
=
'make all layers of encoder bidirectional'
)
parser
.
add_argument
(
'--decoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--decoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'decoder embedding dimension'
)
help
=
'decoder embedding dimension'
)
parser
.
add_argument
(
'--decoder-embed-path'
,
default
=
None
,
type
=
str
,
metavar
=
'STR'
,
parser
.
add_argument
(
'--decoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
help
=
'path to pre-trained decoder embedding'
)
help
=
'path to pre-trained decoder embedding'
)
parser
.
add_argument
(
'--decoder-hidden-size'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--decoder-hidden-size'
,
type
=
int
,
metavar
=
'N'
,
help
=
'decoder hidden size'
)
help
=
'decoder hidden size'
)
...
@@ -415,6 +415,7 @@ def Linear(in_features, out_features, bias=True, dropout=0):
...
@@ -415,6 +415,7 @@ def Linear(in_features, out_features, bias=True, dropout=0):
@
register_model_architecture
(
'lstm'
,
'lstm'
)
@
register_model_architecture
(
'lstm'
,
'lstm'
)
def
base_architecture
(
args
):
def
base_architecture
(
args
):
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
512
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
512
)
args
.
encoder_embed_path
=
getattr
(
args
,
'encoder_embed_path'
,
None
)
args
.
encoder_embed_path
=
getattr
(
args
,
'encoder_embed_path'
,
None
)
args
.
encoder_hidden_size
=
getattr
(
args
,
'encoder_hidden_size'
,
args
.
encoder_embed_dim
)
args
.
encoder_hidden_size
=
getattr
(
args
,
'encoder_hidden_size'
,
args
.
encoder_embed_dim
)
...
@@ -434,6 +435,7 @@ def base_architecture(args):
...
@@ -434,6 +435,7 @@ def base_architecture(args):
@
register_model_architecture
(
'lstm'
,
'lstm_wiseman_iwslt_de_en'
)
@
register_model_architecture
(
'lstm'
,
'lstm_wiseman_iwslt_de_en'
)
def
lstm_wiseman_iwslt_de_en
(
args
):
def
lstm_wiseman_iwslt_de_en
(
args
):
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
256
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
256
)
args
.
encoder_dropout_in
=
getattr
(
args
,
'encoder_dropout_in'
,
0
)
args
.
encoder_dropout_in
=
getattr
(
args
,
'encoder_dropout_in'
,
0
)
args
.
encoder_dropout_out
=
getattr
(
args
,
'encoder_dropout_out'
,
0
)
args
.
encoder_dropout_out
=
getattr
(
args
,
'encoder_dropout_out'
,
0
)
...
...
fairseq/models/transformer.py
View file @
ac5fddfc
...
@@ -48,9 +48,9 @@ class TransformerModel(FairseqModel):
...
@@ -48,9 +48,9 @@ class TransformerModel(FairseqModel):
help
=
'num encoder layers'
)
help
=
'num encoder layers'
)
parser
.
add_argument
(
'--encoder-attention-heads'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--encoder-attention-heads'
,
type
=
int
,
metavar
=
'N'
,
help
=
'num encoder attention heads'
)
help
=
'num encoder attention heads'
)
parser
.
add_argument
(
'--encoder-normalize-before'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--encoder-normalize-before'
,
action
=
'store_true'
,
help
=
'apply layernorm before each encoder block'
)
help
=
'apply layernorm before each encoder block'
)
parser
.
add_argument
(
'--encoder-learned-pos'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--encoder-learned-pos'
,
action
=
'store_true'
,
help
=
'use learned positional embeddings in the encoder'
)
help
=
'use learned positional embeddings in the encoder'
)
parser
.
add_argument
(
'--decoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
parser
.
add_argument
(
'--decoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
help
=
'path to pre-trained decoder embedding'
)
help
=
'path to pre-trained decoder embedding'
)
...
@@ -62,13 +62,13 @@ class TransformerModel(FairseqModel):
...
@@ -62,13 +62,13 @@ class TransformerModel(FairseqModel):
help
=
'num decoder layers'
)
help
=
'num decoder layers'
)
parser
.
add_argument
(
'--decoder-attention-heads'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--decoder-attention-heads'
,
type
=
int
,
metavar
=
'N'
,
help
=
'num decoder attention heads'
)
help
=
'num decoder attention heads'
)
parser
.
add_argument
(
'--decoder-learned-pos'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--decoder-learned-pos'
,
action
=
'store_true'
,
help
=
'use learned positional embeddings in the decoder'
)
help
=
'use learned positional embeddings in the decoder'
)
parser
.
add_argument
(
'--decoder-normalize-before'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--decoder-normalize-before'
,
action
=
'store_true'
,
help
=
'apply layernorm before each decoder block'
)
help
=
'apply layernorm before each decoder block'
)
parser
.
add_argument
(
'--share-decoder-input-output-embed'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--share-decoder-input-output-embed'
,
action
=
'store_true'
,
help
=
'share decoder input and output embeddings'
)
help
=
'share decoder input and output embeddings'
)
parser
.
add_argument
(
'--share-all-embeddings'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--share-all-embeddings'
,
action
=
'store_true'
,
help
=
'share encoder, decoder and output embeddings'
help
=
'share encoder, decoder and output embeddings'
' (requires shared dictionary and embed dim)'
)
' (requires shared dictionary and embed dim)'
)
...
@@ -422,14 +422,20 @@ def base_architecture(args):
...
@@ -422,14 +422,20 @@ def base_architecture(args):
args
.
encoder_ffn_embed_dim
=
getattr
(
args
,
'encoder_ffn_embed_dim'
,
2048
)
args
.
encoder_ffn_embed_dim
=
getattr
(
args
,
'encoder_ffn_embed_dim'
,
2048
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
6
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
6
)
args
.
encoder_attention_heads
=
getattr
(
args
,
'encoder_attention_heads'
,
8
)
args
.
encoder_attention_heads
=
getattr
(
args
,
'encoder_attention_heads'
,
8
)
args
.
encoder_normalize_before
=
getattr
(
args
,
'encoder_normalize_before'
,
False
)
args
.
encoder_learned_pos
=
getattr
(
args
,
'encoder_learned_pos'
,
False
)
args
.
decoder_embed_path
=
getattr
(
args
,
'decoder_embed_path'
,
None
)
args
.
decoder_embed_path
=
getattr
(
args
,
'decoder_embed_path'
,
None
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
args
.
encoder_embed_dim
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
args
.
encoder_embed_dim
)
args
.
decoder_ffn_embed_dim
=
getattr
(
args
,
'decoder_ffn_embed_dim'
,
args
.
encoder_ffn_embed_dim
)
args
.
decoder_ffn_embed_dim
=
getattr
(
args
,
'decoder_ffn_embed_dim'
,
args
.
encoder_ffn_embed_dim
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
6
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
6
)
args
.
decoder_attention_heads
=
getattr
(
args
,
'decoder_attention_heads'
,
8
)
args
.
decoder_attention_heads
=
getattr
(
args
,
'decoder_attention_heads'
,
8
)
args
.
decoder_normalize_before
=
getattr
(
args
,
'decoder_normalize_before'
,
False
)
args
.
decoder_learned_pos
=
getattr
(
args
,
'decoder_learned_pos'
,
False
)
args
.
attention_dropout
=
getattr
(
args
,
'attention_dropout'
,
0.
)
args
.
attention_dropout
=
getattr
(
args
,
'attention_dropout'
,
0.
)
args
.
relu_dropout
=
getattr
(
args
,
'relu_dropout'
,
0.
)
args
.
relu_dropout
=
getattr
(
args
,
'relu_dropout'
,
0.
)
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
share_decoder_input_output_embed
=
getattr
(
args
,
'share_decoder_input_output_embed'
,
False
)
args
.
share_all_embeddings
=
getattr
(
args
,
'share_all_embeddings'
,
False
)
@
register_model_architecture
(
'transformer'
,
'transformer_iwslt_de_en'
)
@
register_model_architecture
(
'transformer'
,
'transformer_iwslt_de_en'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment