Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Fairseq
Commits
ac5fddfc
Commit
ac5fddfc
authored
Jul 11, 2018
by
Mehdi Drissi
Committed by
Myle Ott
Jul 11, 2018
Browse files
Fix up model defaults (#211)
parent
f26b6aff
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
52 additions
and
30 deletions
+52
-30
fairseq/models/fconv.py
fairseq/models/fconv.py
+8
-6
fairseq/models/fconv_self_att.py
fairseq/models/fconv_self_att.py
+27
-15
fairseq/models/lstm.py
fairseq/models/lstm.py
+5
-3
fairseq/models/transformer.py
fairseq/models/transformer.py
+12
-6
No files found.
fairseq/models/fconv.py
View file @
ac5fddfc
...
...
@@ -31,17 +31,17 @@ class FConvModel(FairseqModel):
@
staticmethod
def
add_args
(
parser
):
"""Add model-specific arguments to the parser."""
parser
.
add_argument
(
'--dropout'
,
default
=
0.1
,
type
=
float
,
metavar
=
'D'
,
parser
.
add_argument
(
'--dropout'
,
type
=
float
,
metavar
=
'D'
,
help
=
'dropout probability'
)
parser
.
add_argument
(
'--encoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'encoder embedding dimension'
)
parser
.
add_argument
(
'--encoder-embed-path'
,
default
=
None
,
type
=
str
,
metavar
=
'STR'
,
parser
.
add_argument
(
'--encoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
help
=
'path to pre-trained encoder embedding'
)
parser
.
add_argument
(
'--encoder-layers'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'encoder layers [(dim, kernel_size), ...]'
)
parser
.
add_argument
(
'--decoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'decoder embedding dimension'
)
parser
.
add_argument
(
'--decoder-embed-path'
,
default
=
None
,
type
=
str
,
metavar
=
'STR'
,
parser
.
add_argument
(
'--decoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
help
=
'path to pre-trained decoder embedding'
)
parser
.
add_argument
(
'--decoder-layers'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'decoder layers [(dim, kernel_size), ...]'
)
...
...
@@ -49,7 +49,7 @@ class FConvModel(FairseqModel):
help
=
'decoder output embedding dimension'
)
parser
.
add_argument
(
'--decoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'decoder attention [True, ...]'
)
parser
.
add_argument
(
'--normalization-constant'
,
type
=
float
,
default
=
0.5
,
metavar
=
'D'
,
parser
.
add_argument
(
'--normalization-constant'
,
type
=
float
,
metavar
=
'D'
,
help
=
'multiplies the result of the residual block by sqrt(value)'
)
parser
.
add_argument
(
'--share-input-output-embed'
,
action
=
'store_true'
,
help
=
'share input and output embeddings (requires'
...
...
@@ -104,7 +104,7 @@ class FConvLanguageModel(FairseqLanguageModel):
@
staticmethod
def
add_args
(
parser
):
"""Add model-specific arguments to the parser."""
parser
.
add_argument
(
'--dropout'
,
default
=
0.1
,
type
=
float
,
metavar
=
'D'
,
parser
.
add_argument
(
'--dropout'
,
type
=
float
,
metavar
=
'D'
,
help
=
'dropout probability'
)
parser
.
add_argument
(
'--decoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'decoder embedding dimension'
)
...
...
@@ -117,7 +117,7 @@ class FConvLanguageModel(FairseqLanguageModel):
'Must be used with adaptive_loss criterion'
)
parser
.
add_argument
(
'--decoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'decoder attention [True, ...]'
)
parser
.
add_argument
(
'--normalization-constant'
,
type
=
float
,
default
=
0.5
,
metavar
=
'D'
,
parser
.
add_argument
(
'--normalization-constant'
,
type
=
float
,
metavar
=
'D'
,
help
=
'multiplies the result of the residual block by sqrt(value)'
)
@
classmethod
...
...
@@ -611,6 +611,7 @@ def ConvTBC(in_channels, out_channels, kernel_size, dropout=0, **kwargs):
@
register_model_architecture
(
'fconv_lm'
,
'fconv_lm'
)
def
base_lm_architecture
(
args
):
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
128
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
'[(1268, 4)] * 13'
)
args
.
decoder_attention
=
getattr
(
args
,
'decoder_attention'
,
'False'
)
...
...
@@ -650,6 +651,7 @@ def fconv_lm_dauphin_gbw(args):
@
register_model_architecture
(
'fconv'
,
'fconv'
)
def
base_architecture
(
args
):
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
512
)
args
.
encoder_embed_path
=
getattr
(
args
,
'encoder_embed_path'
,
None
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
'[(512, 3)] * 20'
)
...
...
fairseq/models/fconv_self_att.py
View file @
ac5fddfc
...
...
@@ -41,7 +41,7 @@ class FConvModelSelfAtt(FairseqModel):
@
staticmethod
def
add_args
(
parser
):
"""Add model-specific arguments to the parser."""
parser
.
add_argument
(
'--dropout'
,
default
=
0.1
,
type
=
float
,
metavar
=
'D'
,
parser
.
add_argument
(
'--dropout'
,
type
=
float
,
metavar
=
'D'
,
help
=
'dropout probability'
)
parser
.
add_argument
(
'--encoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'encoder embedding dimension'
)
...
...
@@ -55,25 +55,25 @@ class FConvModelSelfAtt(FairseqModel):
help
=
'decoder output embedding dimension'
)
parser
.
add_argument
(
'--decoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'decoder attention [True, ...]'
)
parser
.
add_argument
(
'--self-attention'
,
default
=
'False'
,
type
=
str
,
metavar
=
'EXPR'
,
parser
.
add_argument
(
'--self-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'decoder self-attention layers, ex: [True] + [False]*5'
)
parser
.
add_argument
(
'--multihead-attention-nheads'
,
default
=
1
,
type
=
int
,
parser
.
add_argument
(
'--multihead-attention-nheads'
,
type
=
int
,
help
=
'Number of heads to use in attention'
)
parser
.
add_argument
(
'--multihead-self-attention-nheads'
,
default
=
1
,
type
=
int
,
parser
.
add_argument
(
'--multihead-self-attention-nheads'
,
type
=
int
,
help
=
'Number of heads to use in self-attention'
)
parser
.
add_argument
(
'--encoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
default
=
'False'
,
parser
.
add_argument
(
'--encoder-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'encoder attention [True, ...]'
)
parser
.
add_argument
(
'--encoder-attention-nheads'
,
default
=
1
,
type
=
int
,
parser
.
add_argument
(
'--encoder-attention-nheads'
,
type
=
int
,
help
=
'Number of heads to use in encoder attention'
)
parser
.
add_argument
(
'--project-input'
,
type
=
str
,
metavar
=
'EXPR'
,
default
=
'False'
,
parser
.
add_argument
(
'--project-input'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'Use projections in self-attention [True, ...]'
)
parser
.
add_argument
(
'--gated-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
default
=
'False'
,
parser
.
add_argument
(
'--gated-attention'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'Use GLU layers in self-attention projections [True, ...]'
)
parser
.
add_argument
(
'--downsample'
,
type
=
str
,
metavar
=
'EXPR'
,
default
=
'False'
,
parser
.
add_argument
(
'--downsample'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'Use downsampling in self-attention [True, ...]'
)
parser
.
add_argument
(
'--pretrained-checkpoint'
,
metavar
=
'DIR'
,
default
=
''
,
parser
.
add_argument
(
'--pretrained-checkpoint'
,
metavar
=
'DIR'
,
help
=
'path to load checkpoint from pretrained model'
)
parser
.
add_argument
(
'--pretrained'
,
type
=
str
,
metavar
=
'EXPR'
,
default
=
'False'
,
parser
.
add_argument
(
'--pretrained'
,
type
=
str
,
metavar
=
'EXPR'
,
help
=
'use pretrained model when training [True, ...]'
)
@
classmethod
...
...
@@ -499,22 +499,34 @@ def ConvTBC(in_channels, out_channels, kernel_size, dropout=0, **kwargs):
@
register_model_architecture
(
'fconv_self_att'
,
'fconv_self_att'
)
def
base_architecture
(
args
):
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
512
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
'[(512, 3)] * 3'
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
512
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
'[(512, 3)] * 8'
)
args
.
decoder_out_embed_dim
=
getattr
(
args
,
'decoder_out_embed_dim'
,
256
)
args
.
decoder_attention
=
getattr
(
args
,
'decoder_attention'
,
'True'
)
args
.
self_attention
=
getattr
(
args
,
'self_attention'
,
'False'
)
args
.
encoder_attention
=
getattr
(
args
,
'encoder_attention'
,
'False'
)
args
.
multihead_attention_nheads
=
getattr
(
args
,
'multihead_attention_nheads'
,
1
)
args
.
multihead_self_attention_nheads
=
getattr
(
args
,
'multihead_self_attention_nheads'
,
1
)
args
.
encoder_attention_nheads
=
getattr
(
args
,
'encoder_attention_nheads'
,
1
)
args
.
project_input
=
getattr
(
args
,
'project_input'
,
'False'
)
args
.
gated_attention
=
getattr
(
args
,
'gated_attention'
,
'False'
)
args
.
downsample
=
getattr
(
args
,
'downsample'
,
'False'
)
args
.
pretrained_checkpoint
=
getattr
(
args
,
'pretrained_checkpoint'
,
''
)
args
.
pretrained
=
getattr
(
args
,
'pretrained'
,
'False'
)
@
register_model_architecture
(
'fconv_self_att'
,
'fconv_self_att_wp'
)
def
fconv_self_att_wp
(
args
):
base_architecture
(
args
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
256
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
'[(128, 3)] * 2 + [(512,3)] * 1'
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
256
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
'[(512, 4)] * 4 + [(768, 4)] * 2 + [(1024, 4)] * 1'
)
args
.
decoder_out_embed_dim
=
getattr
(
args
,
'decoder_out_embed_dim'
,
256
)
args
.
multihead_attention_nheads
=
getattr
(
args
,
'multihead_attention_nheads'
,
1
)
args
.
encoder_attention_nheads
=
getattr
(
args
,
'encoder_attention_nheads'
,
1
)
args
.
self_attention
=
getattr
(
args
,
'self_attention'
,
'True'
)
args
.
multihead_self_attention_nheads
=
getattr
(
args
,
'multihead_self_attention_nheads'
,
4
)
args
.
project_input
=
getattr
(
args
,
'project_input'
,
'True'
)
args
.
gated_attention
=
getattr
(
args
,
'gated_attention'
,
'True'
)
args
.
downsample
=
getattr
(
args
,
'downsample'
,
'True'
)
base_architecture
(
args
)
fairseq/models/lstm.py
View file @
ac5fddfc
...
...
@@ -25,11 +25,11 @@ class LSTMModel(FairseqModel):
@
staticmethod
def
add_args
(
parser
):
"""Add model-specific arguments to the parser."""
parser
.
add_argument
(
'--dropout'
,
default
=
0.1
,
type
=
float
,
metavar
=
'D'
,
parser
.
add_argument
(
'--dropout'
,
type
=
float
,
metavar
=
'D'
,
help
=
'dropout probability'
)
parser
.
add_argument
(
'--encoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'encoder embedding dimension'
)
parser
.
add_argument
(
'--encoder-embed-path'
,
default
=
None
,
type
=
str
,
metavar
=
'STR'
,
parser
.
add_argument
(
'--encoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
help
=
'path to pre-trained encoder embedding'
)
parser
.
add_argument
(
'--encoder-hidden-size'
,
type
=
int
,
metavar
=
'N'
,
help
=
'encoder hidden size'
)
...
...
@@ -39,7 +39,7 @@ class LSTMModel(FairseqModel):
help
=
'make all layers of encoder bidirectional'
)
parser
.
add_argument
(
'--decoder-embed-dim'
,
type
=
int
,
metavar
=
'N'
,
help
=
'decoder embedding dimension'
)
parser
.
add_argument
(
'--decoder-embed-path'
,
default
=
None
,
type
=
str
,
metavar
=
'STR'
,
parser
.
add_argument
(
'--decoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
help
=
'path to pre-trained decoder embedding'
)
parser
.
add_argument
(
'--decoder-hidden-size'
,
type
=
int
,
metavar
=
'N'
,
help
=
'decoder hidden size'
)
...
...
@@ -415,6 +415,7 @@ def Linear(in_features, out_features, bias=True, dropout=0):
@
register_model_architecture
(
'lstm'
,
'lstm'
)
def
base_architecture
(
args
):
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
512
)
args
.
encoder_embed_path
=
getattr
(
args
,
'encoder_embed_path'
,
None
)
args
.
encoder_hidden_size
=
getattr
(
args
,
'encoder_hidden_size'
,
args
.
encoder_embed_dim
)
...
...
@@ -434,6 +435,7 @@ def base_architecture(args):
@
register_model_architecture
(
'lstm'
,
'lstm_wiseman_iwslt_de_en'
)
def
lstm_wiseman_iwslt_de_en
(
args
):
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
encoder_embed_dim
=
getattr
(
args
,
'encoder_embed_dim'
,
256
)
args
.
encoder_dropout_in
=
getattr
(
args
,
'encoder_dropout_in'
,
0
)
args
.
encoder_dropout_out
=
getattr
(
args
,
'encoder_dropout_out'
,
0
)
...
...
fairseq/models/transformer.py
View file @
ac5fddfc
...
...
@@ -48,9 +48,9 @@ class TransformerModel(FairseqModel):
help
=
'num encoder layers'
)
parser
.
add_argument
(
'--encoder-attention-heads'
,
type
=
int
,
metavar
=
'N'
,
help
=
'num encoder attention heads'
)
parser
.
add_argument
(
'--encoder-normalize-before'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--encoder-normalize-before'
,
action
=
'store_true'
,
help
=
'apply layernorm before each encoder block'
)
parser
.
add_argument
(
'--encoder-learned-pos'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--encoder-learned-pos'
,
action
=
'store_true'
,
help
=
'use learned positional embeddings in the encoder'
)
parser
.
add_argument
(
'--decoder-embed-path'
,
type
=
str
,
metavar
=
'STR'
,
help
=
'path to pre-trained decoder embedding'
)
...
...
@@ -62,13 +62,13 @@ class TransformerModel(FairseqModel):
help
=
'num decoder layers'
)
parser
.
add_argument
(
'--decoder-attention-heads'
,
type
=
int
,
metavar
=
'N'
,
help
=
'num decoder attention heads'
)
parser
.
add_argument
(
'--decoder-learned-pos'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--decoder-learned-pos'
,
action
=
'store_true'
,
help
=
'use learned positional embeddings in the decoder'
)
parser
.
add_argument
(
'--decoder-normalize-before'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--decoder-normalize-before'
,
action
=
'store_true'
,
help
=
'apply layernorm before each decoder block'
)
parser
.
add_argument
(
'--share-decoder-input-output-embed'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--share-decoder-input-output-embed'
,
action
=
'store_true'
,
help
=
'share decoder input and output embeddings'
)
parser
.
add_argument
(
'--share-all-embeddings'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--share-all-embeddings'
,
action
=
'store_true'
,
help
=
'share encoder, decoder and output embeddings'
' (requires shared dictionary and embed dim)'
)
...
...
@@ -422,14 +422,20 @@ def base_architecture(args):
args
.
encoder_ffn_embed_dim
=
getattr
(
args
,
'encoder_ffn_embed_dim'
,
2048
)
args
.
encoder_layers
=
getattr
(
args
,
'encoder_layers'
,
6
)
args
.
encoder_attention_heads
=
getattr
(
args
,
'encoder_attention_heads'
,
8
)
args
.
encoder_normalize_before
=
getattr
(
args
,
'encoder_normalize_before'
,
False
)
args
.
encoder_learned_pos
=
getattr
(
args
,
'encoder_learned_pos'
,
False
)
args
.
decoder_embed_path
=
getattr
(
args
,
'decoder_embed_path'
,
None
)
args
.
decoder_embed_dim
=
getattr
(
args
,
'decoder_embed_dim'
,
args
.
encoder_embed_dim
)
args
.
decoder_ffn_embed_dim
=
getattr
(
args
,
'decoder_ffn_embed_dim'
,
args
.
encoder_ffn_embed_dim
)
args
.
decoder_layers
=
getattr
(
args
,
'decoder_layers'
,
6
)
args
.
decoder_attention_heads
=
getattr
(
args
,
'decoder_attention_heads'
,
8
)
args
.
decoder_normalize_before
=
getattr
(
args
,
'decoder_normalize_before'
,
False
)
args
.
decoder_learned_pos
=
getattr
(
args
,
'decoder_learned_pos'
,
False
)
args
.
attention_dropout
=
getattr
(
args
,
'attention_dropout'
,
0.
)
args
.
relu_dropout
=
getattr
(
args
,
'relu_dropout'
,
0.
)
args
.
dropout
=
getattr
(
args
,
'dropout'
,
0.1
)
args
.
share_decoder_input_output_embed
=
getattr
(
args
,
'share_decoder_input_output_embed'
,
False
)
args
.
share_all_embeddings
=
getattr
(
args
,
'share_all_embeddings'
,
False
)
@
register_model_architecture
(
'transformer'
,
'transformer_iwslt_de_en'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment