Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Fairseq
Commits
ac5fddfc
"docs/vscode:/vscode.git/clone" did not exist on "77e0ea8048a4941b3009c067bb367045aa1ed276"
Commit
ac5fddfc
authored
Jul 11, 2018
by
Mehdi Drissi
Committed by
Myle Ott
Jul 11, 2018
Browse files
Fix up model defaults (#211)
parent
f26b6aff
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
52 additions
and
30 deletions
+52
-30
fairseq/models/fconv.py
fairseq/models/fconv.py
+8
-6
fairseq/models/fconv_self_att.py
fairseq/models/fconv_self_att.py
+27
-15
fairseq/models/lstm.py
fairseq/models/lstm.py
+5
-3
fairseq/models/transformer.py
fairseq/models/transformer.py
+12
-6
No files found.
fairseq/models/fconv.py
View file @
ac5fddfc
...
@@ -31,17 +31,17 @@ class FConvModel(FairseqModel):
...
@@ -31,17 +31,17 @@ class FConvModel(FairseqModel):
@
staticmethod
@
staticmethod
def
add_args
(
parser
):
def
add_args
(
parser
):
"""Add model-specific arguments to the parser."""
"""Add model-specific arguments to the parser."""
parser
.
add_argument
(
'--dropout'
,
default
=
0.1
,
type
=
float
,
metavar
=
'D'
,
parser
.
add_argument
(
'--dropout'
,
type
=
float
,
metavar
=
'D'
,
help
=
'dropout probability'
)
help
=
'dropout probability'
)
parser
.
add_argument
(
'--encoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--encoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'encoder embedding dimension'
)
help
=
'encoder embedding dimension'
)
parser
.
add_argument
(
'--encoder-embed-path'
,
default
=
None
,
type
=
str
,
metavar
=
'STR'
,
parser
.
add_argument
(
'--encoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
help
=
'path to pre-trained encoder embedding'
)
help
=
'path to pre-trained encoder embedding'
)
parser
.
add_argument
(
'--encoder-layers'
,
type
=
str
,
metavar
=
'EXPR'
,
parser
.
add_argument
(
'--encoder-layers'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'encoder layers [(dim, kernel_size), ...]'
)
help
=
'encoder layers [(dim, kernel_size), ...]'
)
parser
.
add_argument
(
'--decoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--decoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'decoder embedding dimension'
)
help
=
'decoder embedding dimension'
)
parser
.
add_argument
(
'--decoder-embed-path'
,
default
=
None
,
type
=
str
,
metavar
=
'STR'
,
parser
.
add_argument
(
'--decoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
help
=
'path to pre-trained decoder embedding'
)
help
=
'path to pre-trained decoder embedding'
)
parser
.
add_argument
(
'--decoder-layers'
,
type
=
str
,
metavar
=
'EXPR'
,
parser
.
add_argument
(
'--decoder-layers'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'decoder layers [(dim, kernel_size), ...]'
)
help
=
'decoder layers [(dim, kernel_size), ...]'
)
...
@@ -49,7 +49,7 @@ class FConvModel(FairseqModel):
...
@@ -49,7 +49,7 @@ class FConvModel(FairseqModel):
help
=
'decoder output embedding dimension'
)
help
=
'decoder output embedding dimension'
)
parser
.
add_argument
(
'--decoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
parser
.
add_argument
(
'--decoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'decoder attention [True, ...]'
)
help
=
'decoder attention [True, ...]'
)
parser
.
add_argument
(
'--normalization-constant'
,
type
=
float
,
default
=
0.5
,
metavar
=
'D'
,
parser
.
add_argument
(
'--normalization-constant'
,
type
=
float
,
metavar
=
'D'
,
help
=
'multiplies the result of the residual block by sqrt(value)'
)
help
=
'multiplies the result of the residual block by sqrt(value)'
)
parser
.
add_argument
(
'--share-input-output-embed'
,
action
=
'store_true'
,
parser
.
add_argument
(
'--share-input-output-embed'
,
action
=
'store_true'
,
help
=
'share input and output embeddings (requires'
help
=
'share input and output embeddings (requires'
...
@@ -104,7 +104,7 @@ class FConvLanguageModel(FairseqLanguageModel):
...
@@ -104,7 +104,7 @@ class FConvLanguageModel(FairseqLanguageModel):
@
staticmethod
@
staticmethod
def
add_args
(
parser
):
def
add_args
(
parser
):
"""Add model-specific arguments to the parser."""
"""Add model-specific arguments to the parser."""
parser
.
add_argument
(
'--dropout'
,
default
=
0.1
,
type
=
float
,
metavar
=
'D'
,
parser
.
add_argument
(
'--dropout'
,
type
=
float
,
metavar
=
'D'
,
help
=
'dropout probability'
)
help
=
'dropout probability'
)
parser
.
add_argument
(
'--decoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--decoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'decoder embedding dimension'
)
help
=
'decoder embedding dimension'
)
...
@@ -117,7 +117,7 @@ class FConvLanguageModel(FairseqLanguageModel):
...
@@ -117,7 +117,7 @@ class FConvLanguageModel(FairseqLanguageModel):
'Must be used with adaptive_loss criterion'
)
'Must be used with adaptive_loss criterion'
)
parser
.
add_argument
(
'--decoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
parser
.
add_argument
(
'--decoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'decoder attention [True, ...]'
)
help
=
'decoder attention [True, ...]'
)
parser
.
add_argument
(
'--normalization-constant'
,
type
=
float
,
default
=
0.5
,
metavar
=
'D'
,
parser
.
add_argument
(
'--normalization-constant'
,
type
=
float
,
metavar
=
'D'
,
help
=
'multiplies the result of the residual block by sqrt(value)'
)
help
=
'multiplies the result of the residual block by sqrt(value)'
)
@
classmethod
@
classmethod
...
@@ -611,6 +611,7 @@ def ConvTBC(in_channels, out_channels, kernel_size, dropout=0, **kwargs):
...
@@ -611,6 +611,7 @@ def ConvTBC(in_channels, out_channels, kernel_size, dropout=0, **kwargs):
@
register_model_architecture
(
'fconv_lm'
,
'fconv_lm'
)
@
register_model_architecture
(
'fconv_lm'
,
'fconv_lm'
)
def
base_lm_architecture
(
args
):
def
base_lm_architecture
(
args
):
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
128
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
128
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
'[(1268, 4)] * 13'
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
'[(1268, 4)] * 13'
)
args
.
decoder_attention
=
getattr
(
args
,
'decoder_attention'
,
'False'
)
args
.
decoder_attention
=
getattr
(
args
,
'decoder_attention'
,
'False'
)
...
@@ -650,6 +651,7 @@ def fconv_lm_dauphin_gbw(args):
...
@@ -650,6 +651,7 @@ def fconv_lm_dauphin_gbw(args):
@
register_model_architecture
(
'fconv'
,
'fconv'
)
@
register_model_architecture
(
'fconv'
,
'fconv'
)
def
base_architecture
(
args
):
def
base_architecture
(
args
):
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
512
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
512
)
args
.
encoder_embed_path
=
getattr
(
args
,
'encoder_embed_path'
,
None
)
args
.
encoder_embed_path
=
getattr
(
args
,
'encoder_embed_path'
,
None
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
'[(512, 3)] * 20'
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
'[(512, 3)] * 20'
)
...
...
fairseq/models/fconv_self_att.py
View file @
ac5fddfc
...
@@ -41,7 +41,7 @@ class FConvModelSelfAtt(FairseqModel):
...
@@ -41,7 +41,7 @@ class FConvModelSelfAtt(FairseqModel):
@
staticmethod
@
staticmethod
def
add_args
(
parser
):
def
add_args
(
parser
):
"""Add model-specific arguments to the parser."""
"""Add model-specific arguments to the parser."""
parser
.
add_argument
(
'--dropout'
,
default
=
0.1
,
type
=
float
,
metavar
=
'D'
,
parser
.
add_argument
(
'--dropout'
,
type
=
float
,
metavar
=
'D'
,
help
=
'dropout probability'
)
help
=
'dropout probability'
)
parser
.
add_argument
(
'--encoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--encoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'encoder embedding dimension'
)
help
=
'encoder embedding dimension'
)
...
@@ -55,25 +55,25 @@ class FConvModelSelfAtt(FairseqModel):
...
@@ -55,25 +55,25 @@ class FConvModelSelfAtt(FairseqModel):
help
=
'decoder output embedding dimension'
)
help
=
'decoder output embedding dimension'
)
parser
.
add_argument
(
'--decoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
parser
.
add_argument
(
'--decoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'decoder attention [True, ...]'
)
help
=
'decoder attention [True, ...]'
)
parser
.
add_argument
(
'--self-attention'
,
default
=
'False'
,
type
=
str
,
metavar
=
'EXPR'
,
parser
.
add_argument
(
'--self-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'decoder self-attention layers, ex: [True] + [False]*5'
)
help
=
'decoder self-attention layers, ex: [True] + [False]*5'
)
parser
.
add_argument
(
'--multihead-attention-nheads'
,
default
=
1
,
type
=
int
,
parser
.
add_argument
(
'--multihead-attention-nheads'
,
type
=
int
,
help
=
'Number of heads to use in attention'
)
help
=
'Number of heads to use in attention'
)
parser
.
add_argument
(
'--multihead-self-attention-nheads'
,
default
=
1
,
type
=
int
,
parser
.
add_argument
(
'--multihead-self-attention-nheads'
,
type
=
int
,
help
=
'Number of heads to use in self-attention'
)
help
=
'Number of heads to use in self-attention'
)
parser
.
add_argument
(
'--encoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
default
=
'False'
,
parser
.
add_argument
(
'--encoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'encoder attention [True, ...]'
)
help
=
'encoder attention [True, ...]'
)
parser
.
add_argument
(
'--encoder-attention-nheads'
,
default
=
1
,
type
=
int
,
parser
.
add_argument
(
'--encoder-attention-nheads'
,
type
=
int
,
help
=
'Number of heads to use in encoder attention'
)
help
=
'Number of heads to use in encoder attention'
)
parser
.
add_argument
(
'--project-input'
,
type
=
str
,
metavar
=
'EXPR'
,
default
=
'False'
,
parser
.
add_argument
(
'--project-input'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'Use projections in self-attention [True, ...]'
)
help
=
'Use projections in self-attention [True, ...]'
)
parser
.
add_argument
(
'--gated-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
default
=
'False'
,
parser
.
add_argument
(
'--gated-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'Use GLU layers in self-attention projections [True, ...]'
)
help
=
'Use GLU layers in self-attention projections [True, ...]'
)
parser
.
add_argument
(
'--downsample'
,
type
=
str
,
metavar
=
'EXPR'
,
default
=
'False'
,
parser
.
add_argument
(
'--downsample'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'Use downsampling in self-attention [True, ...]'
)
help
=
'Use downsampling in self-attention [True, ...]'
)
parser
.
add_argument
(
'--pretrained-checkpoint'
,
metavar
=
'DIR'
,
default
=
''
,
parser
.
add_argument
(
'--pretrained-checkpoint'
,
metavar
=
'DIR'
,
help
=
'path to load checkpoint from pretrained model'
)
help
=
'path to load checkpoint from pretrained model'
)
parser
.
add_argument
(
'--pretrained'
,
type
=
str
,
metavar
=
'EXPR'
,
default
=
'False'
,
parser
.
add_argument
(
'--pretrained'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'use pretrained model when training [True, ...]'
)
help
=
'use pretrained model when training [True, ...]'
)
@
classmethod
@
classmethod
...
@@ -499,22 +499,34 @@ def ConvTBC(in_channels, out_channels, kernel_size, dropout=0, **kwargs):
...
@@ -499,22 +499,34 @@ def ConvTBC(in_channels, out_channels, kernel_size, dropout=0, **kwargs):
@
register_model_architecture
(
'fconv_self_att'
,
'fconv_self_att'
)
@
register_model_architecture
(
'fconv_self_att'
,
'fconv_self_att'
)
def
base_architecture
(
args
):
def
base_architecture
(
args
):
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
512
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
512
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
'[(512, 3)] * 3'
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
'[(512, 3)] * 3'
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
512
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
512
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
'[(512, 3)] * 8'
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
'[(512, 3)] * 8'
)
args
.
decoder_out_embed_dim
=
getattr
(
args
,
'decoder_out_embed_dim'
,
256
)
args
.
decoder_out_embed_dim
=
getattr
(
args
,
'decoder_out_embed_dim'
,
256
)
args
.
decoder_attention
=
getattr
(
args
,
'decoder_attention'
,
'True'
)
args
.
decoder_attention
=
getattr
(
args
,
'decoder_attention'
,
'True'
)
args
.
self_attention
=
getattr
(
args
,
'self_attention'
,
'False'
)
args
.
encoder_attention
=
getattr
(
args
,
'encoder_attention'
,
'False'
)
args
.
multihead_attention_nheads
=
getattr
(
args
,
'multihead_attention_nheads'
,
1
)
args
.
multihead_self_attention_nheads
=
getattr
(
args
,
'multihead_self_attention_nheads'
,
1
)
args
.
encoder_attention_nheads
=
getattr
(
args
,
'encoder_attention_nheads'
,
1
)
args
.
project_input
=
getattr
(
args
,
'project_input'
,
'False'
)
args
.
gated_attention
=
getattr
(
args
,
'gated_attention'
,
'False'
)
args
.
downsample
=
getattr
(
args
,
'downsample'
,
'False'
)
args
.
pretrained_checkpoint
=
getattr
(
args
,
'pretrained_checkpoint'
,
''
)
args
.
pretrained
=
getattr
(
args
,
'pretrained'
,
'False'
)
@
register_model_architecture
(
'fconv_self_att'
,
'fconv_self_att_wp'
)
@
register_model_architecture
(
'fconv_self_att'
,
'fconv_self_att_wp'
)
def
fconv_self_att_wp
(
args
):
def
fconv_self_att_wp
(
args
):
base_architecture
(
args
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
256
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
256
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
'[(128, 3)] * 2 + [(512,3)] * 1'
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
'[(128, 3)] * 2 + [(512,3)] * 1'
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
256
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
256
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
'[(512, 4)] * 4 + [(768, 4)] * 2 + [(1024, 4)] * 1'
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
'[(512, 4)] * 4 + [(768, 4)] * 2 + [(1024, 4)] * 1'
)
args
.
decoder_out_embed_dim
=
getattr
(
args
,
'decoder_out_embed_dim'
,
256
)
args
.
decoder_out_embed_dim
=
getattr
(
args
,
'decoder_out_embed_dim'
,
256
)
args
.
multihead_attention_nheads
=
getattr
(
args
,
'multihead_attention_nheads'
,
1
)
args
.
self_attention
=
getattr
(
args
,
'self_attention'
,
'True'
)
args
.
encoder_attention_nheads
=
getattr
(
args
,
'encoder_attention_nheads'
,
1
)
args
.
multihead_self_attention_nheads
=
getattr
(
args
,
'multihead_self_attention_nheads'
,
4
)
args
.
multihead_self_attention_nheads
=
getattr
(
args
,
'multihead_self_attention_nheads'
,
4
)
args
.
project_input
=
getattr
(
args
,
'project_input'
,
'True'
)
args
.
gated_attention
=
getattr
(
args
,
'gated_attention'
,
'True'
)
args
.
downsample
=
getattr
(
args
,
'downsample'
,
'True'
)
base_architecture
(
args
)
fairseq/models/lstm.py
View file @
ac5fddfc
...
@@ -25,11 +25,11 @@ class LSTMModel(FairseqModel):
...
@@ -25,11 +25,11 @@ class LSTMModel(FairseqModel):
@
staticmethod
@
staticmethod
def
add_args
(
parser
):
def
add_args
(
parser
):
"""Add model-specific arguments to the parser."""
"""Add model-specific arguments to the parser."""
parser
.
add_argument
(
'--dropout'
,
default
=
0.1
,
type
=
float
,
metavar
=
'D'
,
parser
.
add_argument
(
'--dropout'
,
type
=
float
,
metavar
=
'D'
,
help
=
'dropout probability'
)
help
=
'dropout probability'
)
parser
.
add_argument
(
'--encoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--encoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'encoder embedding dimension'
)
help
=
'encoder embedding dimension'
)
parser
.
add_argument
(
'--encoder-embed-path'
,
default
=
None
,
type
=
str
,
metavar
=
'STR'
,
parser
.
add_argument
(
'--encoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
help
=
'path to pre-trained encoder embedding'
)
help
=
'path to pre-trained encoder embedding'
)
parser
.
add_argument
(
'--encoder-hidden-size'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--encoder-hidden-size'
,
type
=
int
,
metavar
=
'N'
,
help
=
'encoder hidden size'
)
help
=
'encoder hidden size'
)
...
@@ -39,7 +39,7 @@ class LSTMModel(FairseqModel):
...
@@ -39,7 +39,7 @@ class LSTMModel(FairseqModel):
help
=
'make all layers of encoder bidirectional'
)
help
=
'make all layers of encoder bidirectional'
)
parser
.
add_argument
(
'--decoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--decoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'decoder embedding dimension'
)
help
=
'decoder embedding dimension'
)
parser
.
add_argument
(
'--decoder-embed-path'
,
default
=
None
,
type
=
str
,
metavar
=
'STR'
,
parser
.
add_argument
(
'--decoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
help
=
'path to pre-trained decoder embedding'
)
help
=
'path to pre-trained decoder embedding'
)
parser
.
add_argument
(
'--decoder-hidden-size'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--decoder-hidden-size'
,
type
=
int
,
metavar
=
'N'
,
help
=
'decoder hidden size'
)
help
=
'decoder hidden size'
)
...
@@ -415,6 +415,7 @@ def Linear(in_features, out_features, bias=True, dropout=0):
...
@@ -415,6 +415,7 @@ def Linear(in_features, out_features, bias=True, dropout=0):
@
register_model_architecture
(
'lstm'
,
'lstm'
)
@
register_model_architecture
(
'lstm'
,
'lstm'
)
def
base_architecture
(
args
):
def
base_architecture
(
args
):
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
512
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
512
)
args
.
encoder_embed_path
=
getattr
(
args
,
'encoder_embed_path'
,
None
)
args
.
encoder_embed_path
=
getattr
(
args
,
'encoder_embed_path'
,
None
)
args
.
encoder_hidden_size
=
getattr
(
args
,
'encoder_hidden_size'
,
args
.
encoder_embed_dim
)
args
.
encoder_hidden_size
=
getattr
(
args
,
'encoder_hidden_size'
,
args
.
encoder_embed_dim
)
...
@@ -434,6 +435,7 @@ def base_architecture(args):
...
@@ -434,6 +435,7 @@ def base_architecture(args):
@
register_model_architecture
(
'lstm'
,
'lstm_wiseman_iwslt_de_en'
)
@
register_model_architecture
(
'lstm'
,
'lstm_wiseman_iwslt_de_en'
)
def
lstm_wiseman_iwslt_de_en
(
args
):
def
lstm_wiseman_iwslt_de_en
(
args
):
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
256
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
256
)
args
.
encoder_dropout_in
=
getattr
(
args
,
'encoder_dropout_in'
,
0
)
args
.
encoder_dropout_in
=
getattr
(
args
,
'encoder_dropout_in'
,
0
)
args
.
encoder_dropout_out
=
getattr
(
args
,
'encoder_dropout_out'
,
0
)
args
.
encoder_dropout_out
=
getattr
(
args
,
'encoder_dropout_out'
,
0
)
...
...
fairseq/models/transformer.py
View file @
ac5fddfc
...
@@ -48,9 +48,9 @@ class TransformerModel(FairseqModel):
...
@@ -48,9 +48,9 @@ class TransformerModel(FairseqModel):
help
=
'num encoder layers'
)
help
=
'num encoder layers'
)
parser
.
add_argument
(
'--encoder-attention-heads'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--encoder-attention-heads'
,
type
=
int
,
metavar
=
'N'
,
help
=
'num encoder attention heads'
)
help
=
'num encoder attention heads'
)
parser
.
add_argument
(
'--encoder-normalize-before'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--encoder-normalize-before'
,
action
=
'store_true'
,
help
=
'apply layernorm before each encoder block'
)
help
=
'apply layernorm before each encoder block'
)
parser
.
add_argument
(
'--encoder-learned-pos'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--encoder-learned-pos'
,
action
=
'store_true'
,
help
=
'use learned positional embeddings in the encoder'
)
help
=
'use learned positional embeddings in the encoder'
)
parser
.
add_argument
(
'--decoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
parser
.
add_argument
(
'--decoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
help
=
'path to pre-trained decoder embedding'
)
help
=
'path to pre-trained decoder embedding'
)
...
@@ -62,13 +62,13 @@ class TransformerModel(FairseqModel):
...
@@ -62,13 +62,13 @@ class TransformerModel(FairseqModel):
help
=
'num decoder layers'
)
help
=
'num decoder layers'
)
parser
.
add_argument
(
'--decoder-attention-heads'
,
type
=
int
,
metavar
=
'N'
,
parser
.
add_argument
(
'--decoder-attention-heads'
,
type
=
int
,
metavar
=
'N'
,
help
=
'num decoder attention heads'
)
help
=
'num decoder attention heads'
)
parser
.
add_argument
(
'--decoder-learned-pos'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--decoder-learned-pos'
,
action
=
'store_true'
,
help
=
'use learned positional embeddings in the decoder'
)
help
=
'use learned positional embeddings in the decoder'
)
parser
.
add_argument
(
'--decoder-normalize-before'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--decoder-normalize-before'
,
action
=
'store_true'
,
help
=
'apply layernorm before each decoder block'
)
help
=
'apply layernorm before each decoder block'
)
parser
.
add_argument
(
'--share-decoder-input-output-embed'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--share-decoder-input-output-embed'
,
action
=
'store_true'
,
help
=
'share decoder input and output embeddings'
)
help
=
'share decoder input and output embeddings'
)
parser
.
add_argument
(
'--share-all-embeddings'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--share-all-embeddings'
,
action
=
'store_true'
,
help
=
'share encoder, decoder and output embeddings'
help
=
'share encoder, decoder and output embeddings'
' (requires shared dictionary and embed dim)'
)
' (requires shared dictionary and embed dim)'
)
...
@@ -422,14 +422,20 @@ def base_architecture(args):
...
@@ -422,14 +422,20 @@ def base_architecture(args):
args
.
encoder_ffn_embed_dim
=
getattr
(
args
,
'encoder_ffn_embed_dim'
,
2048
)
args
.
encoder_ffn_embed_dim
=
getattr
(
args
,
'encoder_ffn_embed_dim'
,
2048
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
6
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
6
)
args
.
encoder_attention_heads
=
getattr
(
args
,
'encoder_attention_heads'
,
8
)
args
.
encoder_attention_heads
=
getattr
(
args
,
'encoder_attention_heads'
,
8
)
args
.
encoder_normalize_before
=
getattr
(
args
,
'encoder_normalize_before'
,
False
)
args
.
encoder_learned_pos
=
getattr
(
args
,
'encoder_learned_pos'
,
False
)
args
.
decoder_embed_path
=
getattr
(
args
,
'decoder_embed_path'
,
None
)
args
.
decoder_embed_path
=
getattr
(
args
,
'decoder_embed_path'
,
None
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
args
.
encoder_embed_dim
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
args
.
encoder_embed_dim
)
args
.
decoder_ffn_embed_dim
=
getattr
(
args
,
'decoder_ffn_embed_dim'
,
args
.
encoder_ffn_embed_dim
)
args
.
decoder_ffn_embed_dim
=
getattr
(
args
,
'decoder_ffn_embed_dim'
,
args
.
encoder_ffn_embed_dim
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
6
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
6
)
args
.
decoder_attention_heads
=
getattr
(
args
,
'decoder_attention_heads'
,
8
)
args
.
decoder_attention_heads
=
getattr
(
args
,
'decoder_attention_heads'
,
8
)
args
.
decoder_normalize_before
=
getattr
(
args
,
'decoder_normalize_before'
,
False
)
args
.
decoder_learned_pos
=
getattr
(
args
,
'decoder_learned_pos'
,
False
)
args
.
attention_dropout
=
getattr
(
args
,
'attention_dropout'
,
0.
)
args
.
attention_dropout
=
getattr
(
args
,
'attention_dropout'
,
0.
)
args
.
relu_dropout
=
getattr
(
args
,
'relu_dropout'
,
0.
)
args
.
relu_dropout
=
getattr
(
args
,
'relu_dropout'
,
0.
)
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
share_decoder_input_output_embed
=
getattr
(
args
,
'share_decoder_input_output_embed'
,
False
)
args
.
share_all_embeddings
=
getattr
(
args
,
'share_all_embeddings'
,
False
)
@
register_model_architecture
(
'transformer'
,
'transformer_iwslt_de_en'
)
@
register_model_architecture
(
'transformer'
,
'transformer_iwslt_de_en'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment