Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
47f0e3cf
Commit
47f0e3cf
authored
Dec 13, 2019
by
thomwolf
Browse files
cleaning up configuration classes
parent
7296f101
Changes
43
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
199 additions
and
298 deletions
+199
-298
examples/summarization/configuration_bertabs.py
examples/summarization/configuration_bertabs.py
+5
-5
templates/adding_a_new_model/configuration_xxx.py
templates/adding_a_new_model/configuration_xxx.py
+6
-6
templates/adding_a_new_model/tests/modeling_tf_xxx_test.py
templates/adding_a_new_model/tests/modeling_tf_xxx_test.py
+1
-1
templates/adding_a_new_model/tests/modeling_xxx_test.py
templates/adding_a_new_model/tests/modeling_xxx_test.py
+1
-1
transformers/configuration_albert.py
transformers/configuration_albert.py
+3
-3
transformers/configuration_bert.py
transformers/configuration_bert.py
+14
-24
transformers/configuration_ctrl.py
transformers/configuration_ctrl.py
+4
-19
transformers/configuration_distilbert.py
transformers/configuration_distilbert.py
+15
-25
transformers/configuration_gpt2.py
transformers/configuration_gpt2.py
+19
-36
transformers/configuration_openai.py
transformers/configuration_openai.py
+20
-37
transformers/configuration_transfo_xl.py
transformers/configuration_transfo_xl.py
+8
-18
transformers/configuration_utils.py
transformers/configuration_utils.py
+20
-7
transformers/configuration_xlm.py
transformers/configuration_xlm.py
+36
-52
transformers/configuration_xlnet.py
transformers/configuration_xlnet.py
+34
-53
transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
...convert_roberta_original_pytorch_checkpoint_to_pytorch.py
+1
-1
transformers/modeling_gpt2.py
transformers/modeling_gpt2.py
+1
-0
transformers/modeling_tf_gpt2.py
transformers/modeling_tf_gpt2.py
+1
-0
transformers/modeling_tf_transfo_xl.py
transformers/modeling_tf_transfo_xl.py
+3
-3
transformers/modeling_tf_transfo_xl_utilities.py
transformers/modeling_tf_transfo_xl_utilities.py
+6
-6
transformers/modeling_tf_xlnet.py
transformers/modeling_tf_xlnet.py
+1
-1
No files found.
examples/summarization/configuration_bertabs.py
View file @
47f0e3cf
...
@@ -65,7 +65,7 @@ class BertAbsConfig(PretrainedConfig):
...
@@ -65,7 +65,7 @@ class BertAbsConfig(PretrainedConfig):
def
__init__
(
def
__init__
(
self
,
self
,
vocab_size
_or_config_json_file
=
30522
,
vocab_size
=
30522
,
max_pos
=
512
,
max_pos
=
512
,
enc_layers
=
6
,
enc_layers
=
6
,
enc_hidden_size
=
512
,
enc_hidden_size
=
512
,
...
@@ -81,14 +81,14 @@ class BertAbsConfig(PretrainedConfig):
...
@@ -81,14 +81,14 @@ class BertAbsConfig(PretrainedConfig):
):
):
super
(
BertAbsConfig
,
self
).
__init__
(
**
kwargs
)
super
(
BertAbsConfig
,
self
).
__init__
(
**
kwargs
)
if
self
.
_input_is_path_to_json
(
vocab_size
_or_config_json_file
):
if
self
.
_input_is_path_to_json
(
vocab_size
):
path_to_json
=
vocab_size
_or_config_json_file
path_to_json
=
vocab_size
with
open
(
path_to_json
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
with
open
(
path_to_json
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size
_or_config_json_file
,
int
):
elif
isinstance
(
vocab_size
,
int
):
self
.
vocab_size
=
vocab_size
_or_config_json_file
self
.
vocab_size
=
vocab_size
self
.
max_pos
=
max_pos
self
.
max_pos
=
max_pos
self
.
enc_layers
=
enc_layers
self
.
enc_layers
=
enc_layers
...
...
templates/adding_a_new_model/configuration_xxx.py
View file @
47f0e3cf
...
@@ -39,7 +39,7 @@ class XxxConfig(PretrainedConfig):
...
@@ -39,7 +39,7 @@ class XxxConfig(PretrainedConfig):
Arguments:
Arguments:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `XxxModel`.
vocab_size: Vocabulary size of `inputs_ids` in `XxxModel`.
hidden_size: Size of the encoder layers and the pooler layer.
hidden_size: Size of the encoder layers and the pooler layer.
num_hidden_layers: Number of hidden layers in the Transformer encoder.
num_hidden_layers: Number of hidden layers in the Transformer encoder.
num_attention_heads: Number of attention heads for each attention layer in
num_attention_heads: Number of attention heads for each attention layer in
...
@@ -64,7 +64,7 @@ class XxxConfig(PretrainedConfig):
...
@@ -64,7 +64,7 @@ class XxxConfig(PretrainedConfig):
pretrained_config_archive_map
=
XXX_PRETRAINED_CONFIG_ARCHIVE_MAP
pretrained_config_archive_map
=
XXX_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
50257
,
vocab_size
=
50257
,
n_positions
=
1024
,
n_positions
=
1024
,
n_ctx
=
1024
,
n_ctx
=
1024
,
n_embd
=
768
,
n_embd
=
768
,
...
@@ -84,7 +84,7 @@ class XxxConfig(PretrainedConfig):
...
@@ -84,7 +84,7 @@ class XxxConfig(PretrainedConfig):
summary_first_dropout
=
0.1
,
summary_first_dropout
=
0.1
,
**
kwargs
):
**
kwargs
):
super
(
XxxConfig
,
self
).
__init__
(
**
kwargs
)
super
(
XxxConfig
,
self
).
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
_or_config_json_file
if
isinstance
(
vocab_size
_or_config_json_file
,
six
.
string_types
)
else
-
1
self
.
vocab_size
=
vocab_size
if
isinstance
(
vocab_size
,
six
.
string_types
)
else
-
1
self
.
n_ctx
=
n_ctx
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
self
.
n_embd
=
n_embd
...
@@ -102,12 +102,12 @@ class XxxConfig(PretrainedConfig):
...
@@ -102,12 +102,12 @@ class XxxConfig(PretrainedConfig):
self
.
summary_activation
=
summary_activation
self
.
summary_activation
=
summary_activation
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
self
.
summary_proj_to_labels
=
summary_proj_to_labels
if
isinstance
(
vocab_size
_or_config_json_file
,
six
.
string_types
):
if
isinstance
(
vocab_size
,
six
.
string_types
):
with
open
(
vocab_size
_or_config_json_file
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
with
open
(
vocab_size
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
self
.
__dict__
[
key
]
=
value
elif
not
isinstance
(
vocab_size
_or_config_json_file
,
int
):
elif
not
isinstance
(
vocab_size
,
int
):
raise
ValueError
(
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
"or the path to a pretrained model config file (str)"
...
...
templates/adding_a_new_model/tests/modeling_tf_xxx_test.py
View file @
47f0e3cf
...
@@ -111,7 +111,7 @@ class TFXxxModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -111,7 +111,7 @@ class TFXxxModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
XxxConfig
(
config
=
XxxConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
num_attention_heads
=
self
.
num_attention_heads
,
...
...
templates/adding_a_new_model/tests/modeling_xxx_test.py
View file @
47f0e3cf
...
@@ -109,7 +109,7 @@ class XxxModelTest(CommonTestCases.CommonModelTester):
...
@@ -109,7 +109,7 @@ class XxxModelTest(CommonTestCases.CommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
XxxConfig
(
config
=
XxxConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
num_attention_heads
=
self
.
num_attention_heads
,
...
...
transformers/configuration_albert.py
View file @
47f0e3cf
...
@@ -37,7 +37,7 @@ class AlbertConfig(PretrainedConfig):
...
@@ -37,7 +37,7 @@ class AlbertConfig(PretrainedConfig):
pretrained_config_archive_map
=
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
pretrained_config_archive_map
=
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
30000
,
vocab_size
=
30000
,
embedding_size
=
128
,
embedding_size
=
128
,
hidden_size
=
4096
,
hidden_size
=
4096
,
num_hidden_layers
=
12
,
num_hidden_layers
=
12
,
...
@@ -83,7 +83,7 @@ class AlbertConfig(PretrainedConfig):
...
@@ -83,7 +83,7 @@ class AlbertConfig(PretrainedConfig):
"""
"""
super
(
AlbertConfig
,
self
).
__init__
(
**
kwargs
)
super
(
AlbertConfig
,
self
).
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
_or_config_json_file
self
.
vocab_size
=
vocab_size
self
.
embedding_size
=
embedding_size
self
.
embedding_size
=
embedding_size
self
.
hidden_size
=
hidden_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_hidden_layers
=
num_hidden_layers
...
...
transformers/configuration_bert.py
View file @
47f0e3cf
...
@@ -56,7 +56,7 @@ class BertConfig(PretrainedConfig):
...
@@ -56,7 +56,7 @@ class BertConfig(PretrainedConfig):
Arguments:
Arguments:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `BertModel`.
vocab_size: Vocabulary size of `inputs_ids` in `BertModel`.
hidden_size: Size of the encoder layers and the pooler layer.
hidden_size: Size of the encoder layers and the pooler layer.
num_hidden_layers: Number of hidden layers in the Transformer encoder.
num_hidden_layers: Number of hidden layers in the Transformer encoder.
num_attention_heads: Number of attention heads for each attention layer in
num_attention_heads: Number of attention heads for each attention layer in
...
@@ -81,7 +81,7 @@ class BertConfig(PretrainedConfig):
...
@@ -81,7 +81,7 @@ class BertConfig(PretrainedConfig):
pretrained_config_archive_map
=
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
pretrained_config_archive_map
=
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
30522
,
vocab_size
=
30522
,
hidden_size
=
768
,
hidden_size
=
768
,
num_hidden_layers
=
12
,
num_hidden_layers
=
12
,
num_attention_heads
=
12
,
num_attention_heads
=
12
,
...
@@ -95,14 +95,7 @@ class BertConfig(PretrainedConfig):
...
@@ -95,14 +95,7 @@ class BertConfig(PretrainedConfig):
layer_norm_eps
=
1e-12
,
layer_norm_eps
=
1e-12
,
**
kwargs
):
**
kwargs
):
super
(
BertConfig
,
self
).
__init__
(
**
kwargs
)
super
(
BertConfig
,
self
).
__init__
(
**
kwargs
)
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
self
.
vocab_size
=
vocab_size
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
hidden_size
=
hidden_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
num_attention_heads
=
num_attention_heads
...
@@ -114,6 +107,3 @@ class BertConfig(PretrainedConfig):
...
@@ -114,6 +107,3 @@ class BertConfig(PretrainedConfig):
self
.
type_vocab_size
=
type_vocab_size
self
.
type_vocab_size
=
type_vocab_size
self
.
initializer_range
=
initializer_range
self
.
initializer_range
=
initializer_range
self
.
layer_norm_eps
=
layer_norm_eps
self
.
layer_norm_eps
=
layer_norm_eps
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)"
)
transformers/configuration_ctrl.py
View file @
47f0e3cf
...
@@ -31,7 +31,7 @@ class CTRLConfig(PretrainedConfig):
...
@@ -31,7 +31,7 @@ class CTRLConfig(PretrainedConfig):
"""Configuration class to store the configuration of a `CTRLModel`.
"""Configuration class to store the configuration of a `CTRLModel`.
Args:
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file.
n_positions: Number of positional embeddings.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
n_ctx: Size of the causal mask (usually same as n_positions).
dff: Size of the inner dimension of the FFN.
dff: Size of the inner dimension of the FFN.
...
@@ -52,7 +52,7 @@ class CTRLConfig(PretrainedConfig):
...
@@ -52,7 +52,7 @@ class CTRLConfig(PretrainedConfig):
def
__init__
(
def
__init__
(
self
,
self
,
vocab_size
_or_config_json_file
=
246534
,
vocab_size
=
246534
,
n_positions
=
256
,
n_positions
=
256
,
n_ctx
=
256
,
n_ctx
=
256
,
n_embd
=
1280
,
n_embd
=
1280
,
...
@@ -64,8 +64,6 @@ class CTRLConfig(PretrainedConfig):
...
@@ -64,8 +64,6 @@ class CTRLConfig(PretrainedConfig):
attn_pdrop
=
0.1
,
attn_pdrop
=
0.1
,
layer_norm_epsilon
=
1e-6
,
layer_norm_epsilon
=
1e-6
,
initializer_range
=
0.02
,
initializer_range
=
0.02
,
num_labels
=
1
,
summary_type
=
'cls_index'
,
summary_type
=
'cls_index'
,
summary_use_proj
=
True
,
summary_use_proj
=
True
,
summary_activation
=
None
,
summary_activation
=
None
,
...
@@ -76,7 +74,7 @@ class CTRLConfig(PretrainedConfig):
...
@@ -76,7 +74,7 @@ class CTRLConfig(PretrainedConfig):
"""Constructs CTRLConfig.
"""Constructs CTRLConfig.
Args:
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file.
n_positions: Number of positional embeddings.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
n_ctx: Size of the causal mask (usually same as n_positions).
dff: Size of the inner dimension of the FFN.
dff: Size of the inner dimension of the FFN.
...
@@ -94,8 +92,7 @@ class CTRLConfig(PretrainedConfig):
...
@@ -94,8 +92,7 @@ class CTRLConfig(PretrainedConfig):
initializing all weight matrices.
initializing all weight matrices.
"""
"""
super
(
CTRLConfig
,
self
).
__init__
(
**
kwargs
)
super
(
CTRLConfig
,
self
).
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size_or_config_json_file
if
isinstance
(
vocab_size_or_config_json_file
,
int
)
else
-
1
self
.
n_ctx
=
n_ctx
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
self
.
n_embd
=
n_embd
...
@@ -108,23 +105,11 @@ class CTRLConfig(PretrainedConfig):
...
@@ -108,23 +105,11 @@ class CTRLConfig(PretrainedConfig):
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
initializer_range
=
initializer_range
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_activation
=
summary_activation
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
self
.
summary_proj_to_labels
=
summary_proj_to_labels
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
not
isinstance
(
vocab_size_or_config_json_file
,
int
):
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
)
@
property
@
property
def
max_position_embeddings
(
self
):
def
max_position_embeddings
(
self
):
...
...
transformers/configuration_distilbert.py
View file @
47f0e3cf
...
@@ -37,7 +37,7 @@ class DistilBertConfig(PretrainedConfig):
...
@@ -37,7 +37,7 @@ class DistilBertConfig(PretrainedConfig):
pretrained_config_archive_map
=
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
pretrained_config_archive_map
=
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
30522
,
vocab_size
=
30522
,
max_position_embeddings
=
512
,
max_position_embeddings
=
512
,
sinusoidal_pos_embds
=
False
,
sinusoidal_pos_embds
=
False
,
n_layers
=
6
,
n_layers
=
6
,
...
@@ -53,15 +53,7 @@ class DistilBertConfig(PretrainedConfig):
...
@@ -53,15 +53,7 @@ class DistilBertConfig(PretrainedConfig):
seq_classif_dropout
=
0.2
,
seq_classif_dropout
=
0.2
,
**
kwargs
):
**
kwargs
):
super
(
DistilBertConfig
,
self
).
__init__
(
**
kwargs
)
super
(
DistilBertConfig
,
self
).
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
max_position_embeddings
=
max_position_embeddings
self
.
max_position_embeddings
=
max_position_embeddings
self
.
sinusoidal_pos_embds
=
sinusoidal_pos_embds
self
.
sinusoidal_pos_embds
=
sinusoidal_pos_embds
self
.
n_layers
=
n_layers
self
.
n_layers
=
n_layers
...
@@ -75,9 +67,7 @@ class DistilBertConfig(PretrainedConfig):
...
@@ -75,9 +67,7 @@ class DistilBertConfig(PretrainedConfig):
self
.
tie_weights_
=
tie_weights_
self
.
tie_weights_
=
tie_weights_
self
.
qa_dropout
=
qa_dropout
self
.
qa_dropout
=
qa_dropout
self
.
seq_classif_dropout
=
seq_classif_dropout
self
.
seq_classif_dropout
=
seq_classif_dropout
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)"
)
@
property
@
property
def
hidden_size
(
self
):
def
hidden_size
(
self
):
return
self
.
dim
return
self
.
dim
...
...
transformers/configuration_gpt2.py
View file @
47f0e3cf
...
@@ -36,7 +36,7 @@ class GPT2Config(PretrainedConfig):
...
@@ -36,7 +36,7 @@ class GPT2Config(PretrainedConfig):
"""Configuration class to store the configuration of a `GPT2Model`.
"""Configuration class to store the configuration of a `GPT2Model`.
Args:
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
n_positions: Number of positional embeddings.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
n_ctx: Size of the causal mask (usually same as n_positions).
n_embd: Dimensionality of the embeddings and hidden states.
n_embd: Dimensionality of the embeddings and hidden states.
...
@@ -56,7 +56,7 @@ class GPT2Config(PretrainedConfig):
...
@@ -56,7 +56,7 @@ class GPT2Config(PretrainedConfig):
def
__init__
(
def
__init__
(
self
,
self
,
vocab_size
_or_config_json_file
=
50257
,
vocab_size
=
50257
,
n_positions
=
1024
,
n_positions
=
1024
,
n_ctx
=
1024
,
n_ctx
=
1024
,
n_embd
=
768
,
n_embd
=
768
,
...
@@ -67,8 +67,6 @@ class GPT2Config(PretrainedConfig):
...
@@ -67,8 +67,6 @@ class GPT2Config(PretrainedConfig):
attn_pdrop
=
0.1
,
attn_pdrop
=
0.1
,
layer_norm_epsilon
=
1e-5
,
layer_norm_epsilon
=
1e-5
,
initializer_range
=
0.02
,
initializer_range
=
0.02
,
num_labels
=
1
,
summary_type
=
'cls_index'
,
summary_type
=
'cls_index'
,
summary_use_proj
=
True
,
summary_use_proj
=
True
,
summary_activation
=
None
,
summary_activation
=
None
,
...
@@ -79,7 +77,7 @@ class GPT2Config(PretrainedConfig):
...
@@ -79,7 +77,7 @@ class GPT2Config(PretrainedConfig):
"""Constructs GPT2Config.
"""Constructs GPT2Config.
Args:
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
n_positions: Number of positional embeddings.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
n_ctx: Size of the causal mask (usually same as n_positions).
n_embd: Dimensionality of the embeddings and hidden states.
n_embd: Dimensionality of the embeddings and hidden states.
...
@@ -96,15 +94,7 @@ class GPT2Config(PretrainedConfig):
...
@@ -96,15 +94,7 @@ class GPT2Config(PretrainedConfig):
initializing all weight matrices.
initializing all weight matrices.
"""
"""
super
(
GPT2Config
,
self
).
__init__
(
**
kwargs
)
super
(
GPT2Config
,
self
).
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
n_ctx
=
n_ctx
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
self
.
n_embd
=
n_embd
...
@@ -115,18 +105,11 @@ class GPT2Config(PretrainedConfig):
...
@@ -115,18 +105,11 @@ class GPT2Config(PretrainedConfig):
self
.
attn_pdrop
=
attn_pdrop
self
.
attn_pdrop
=
attn_pdrop
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
initializer_range
=
initializer_range
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_activation
=
summary_activation
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
self
.
summary_proj_to_labels
=
summary_proj_to_labels
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
)
@
property
@
property
def
max_position_embeddings
(
self
):
def
max_position_embeddings
(
self
):
...
...
transformers/configuration_openai.py
View file @
47f0e3cf
...
@@ -35,7 +35,7 @@ class OpenAIGPTConfig(PretrainedConfig):
...
@@ -35,7 +35,7 @@ class OpenAIGPTConfig(PretrainedConfig):
Configuration class to store the configuration of a `OpenAIGPTModel`.
Configuration class to store the configuration of a `OpenAIGPTModel`.
Args:
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `OpenAIGPTModel` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `OpenAIGPTModel` or a configuration json file.
n_positions: Number of positional embeddings.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
n_ctx: Size of the causal mask (usually same as n_positions).
n_embd: Dimensionality of the embeddings and hidden states.
n_embd: Dimensionality of the embeddings and hidden states.
...
@@ -58,7 +58,7 @@ class OpenAIGPTConfig(PretrainedConfig):
...
@@ -58,7 +58,7 @@ class OpenAIGPTConfig(PretrainedConfig):
def
__init__
(
def
__init__
(
self
,
self
,
vocab_size
_or_config_json_file
=
40478
,
vocab_size
=
40478
,
n_positions
=
512
,
n_positions
=
512
,
n_ctx
=
512
,
n_ctx
=
512
,
n_embd
=
768
,
n_embd
=
768
,
...
@@ -71,8 +71,6 @@ class OpenAIGPTConfig(PretrainedConfig):
...
@@ -71,8 +71,6 @@ class OpenAIGPTConfig(PretrainedConfig):
layer_norm_epsilon
=
1e-5
,
layer_norm_epsilon
=
1e-5
,
initializer_range
=
0.02
,
initializer_range
=
0.02
,
predict_special_tokens
=
True
,
predict_special_tokens
=
True
,
num_labels
=
1
,
summary_type
=
'cls_index'
,
summary_type
=
'cls_index'
,
summary_use_proj
=
True
,
summary_use_proj
=
True
,
summary_activation
=
None
,
summary_activation
=
None
,
...
@@ -83,15 +81,7 @@ class OpenAIGPTConfig(PretrainedConfig):
...
@@ -83,15 +81,7 @@ class OpenAIGPTConfig(PretrainedConfig):
"""Constructs OpenAIGPTConfig.
"""Constructs OpenAIGPTConfig.
"""
"""
super
(
OpenAIGPTConfig
,
self
).
__init__
(
**
kwargs
)
super
(
OpenAIGPTConfig
,
self
).
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
n_ctx
=
n_ctx
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
self
.
n_embd
=
n_embd
...
@@ -104,18 +94,11 @@ class OpenAIGPTConfig(PretrainedConfig):
...
@@ -104,18 +94,11 @@ class OpenAIGPTConfig(PretrainedConfig):
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
initializer_range
=
initializer_range
self
.
initializer_range
=
initializer_range
self
.
predict_special_tokens
=
predict_special_tokens
self
.
predict_special_tokens
=
predict_special_tokens
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_activation
=
summary_activation
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
self
.
summary_proj_to_labels
=
summary_proj_to_labels
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
)
@
property
@
property
def
max_position_embeddings
(
self
):
def
max_position_embeddings
(
self
):
...
...
transformers/configuration_transfo_xl.py
View file @
47f0e3cf
...
@@ -34,7 +34,7 @@ class TransfoXLConfig(PretrainedConfig):
...
@@ -34,7 +34,7 @@ class TransfoXLConfig(PretrainedConfig):
"""Configuration class to store the configuration of a `TransfoXLModel`.
"""Configuration class to store the configuration of a `TransfoXLModel`.
Args:
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `TransfoXLModel` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `TransfoXLModel` or a configuration json file.
cutoffs: cutoffs for the adaptive softmax
cutoffs: cutoffs for the adaptive softmax
d_model: Dimensionality of the model's hidden states.
d_model: Dimensionality of the model's hidden states.
d_embed: Dimensionality of the embeddings
d_embed: Dimensionality of the embeddings
...
@@ -68,7 +68,7 @@ class TransfoXLConfig(PretrainedConfig):
...
@@ -68,7 +68,7 @@ class TransfoXLConfig(PretrainedConfig):
pretrained_config_archive_map
=
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
pretrained_config_archive_map
=
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
267735
,
vocab_size
=
267735
,
cutoffs
=
[
20000
,
40000
,
200000
],
cutoffs
=
[
20000
,
40000
,
200000
],
d_model
=
1024
,
d_model
=
1024
,
d_embed
=
1024
,
d_embed
=
1024
,
...
@@ -100,7 +100,7 @@ class TransfoXLConfig(PretrainedConfig):
...
@@ -100,7 +100,7 @@ class TransfoXLConfig(PretrainedConfig):
"""Constructs TransfoXLConfig.
"""Constructs TransfoXLConfig.
"""
"""
super
(
TransfoXLConfig
,
self
).
__init__
(
**
kwargs
)
super
(
TransfoXLConfig
,
self
).
__init__
(
**
kwargs
)
self
.
n_token
=
vocab_size_or_config_json_file
if
isinstance
(
vocab_size_or_config_json_file
,
int
)
else
-
1
self
.
vocab_size
=
vocab_size
self
.
cutoffs
=
[]
self
.
cutoffs
=
[]
self
.
cutoffs
.
extend
(
cutoffs
)
self
.
cutoffs
.
extend
(
cutoffs
)
self
.
tie_weight
=
tie_weight
self
.
tie_weight
=
tie_weight
...
@@ -133,27 +133,17 @@ class TransfoXLConfig(PretrainedConfig):
...
@@ -133,27 +133,17 @@ class TransfoXLConfig(PretrainedConfig):
self
.
init_std
=
init_std
self
.
init_std
=
init_std
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
layer_norm_epsilon
=
layer_norm_epsilon
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
not
isinstance
(
vocab_size_or_config_json_file
,
int
):
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)"
)
@
property
@
property
def
max_position_embeddings
(
self
):
def
max_position_embeddings
(
self
):
return
self
.
tgt_len
+
self
.
ext_len
+
self
.
mem_len
return
self
.
tgt_len
+
self
.
ext_len
+
self
.
mem_len
@
property
@
property
def
vocab_size
(
self
):
def
n_token
(
self
):
# Backward compatibility
return
self
.
n_token
return
self
.
vocab_size
@
vocab_size
.
setter
@
n_token
.
setter
def
vocab_size
(
self
,
value
):
def
n_token
(
self
,
value
):
# Backward compatibility
self
.
n_token
=
value
self
.
vocab_size
=
value
@
property
@
property
def
hidden_size
(
self
):
def
hidden_size
(
self
):
...
...
transformers/configuration_utils.py
View file @
47f0e3cf
...
@@ -49,8 +49,7 @@ class PretrainedConfig(object):
...
@@ -49,8 +49,7 @@ class PretrainedConfig(object):
pretrained_config_archive_map
=
{}
pretrained_config_archive_map
=
{}
def
__init__
(
self
,
**
kwargs
):
def
__init__
(
self
,
**
kwargs
):
self
.
finetuning_task
=
kwargs
.
pop
(
'finetuning_task'
,
None
)
# Attributes with defaults
self
.
num_labels
=
kwargs
.
pop
(
'num_labels'
,
2
)
self
.
output_attentions
=
kwargs
.
pop
(
'output_attentions'
,
False
)
self
.
output_attentions
=
kwargs
.
pop
(
'output_attentions'
,
False
)
self
.
output_hidden_states
=
kwargs
.
pop
(
'output_hidden_states'
,
False
)
self
.
output_hidden_states
=
kwargs
.
pop
(
'output_hidden_states'
,
False
)
self
.
output_past
=
kwargs
.
pop
(
'output_past'
,
True
)
# Not used by all models
self
.
output_past
=
kwargs
.
pop
(
'output_past'
,
True
)
# Not used by all models
...
@@ -59,6 +58,22 @@ class PretrainedConfig(object):
...
@@ -59,6 +58,22 @@ class PretrainedConfig(object):
self
.
pruned_heads
=
kwargs
.
pop
(
'pruned_heads'
,
{})
self
.
pruned_heads
=
kwargs
.
pop
(
'pruned_heads'
,
{})
self
.
is_decoder
=
kwargs
.
pop
(
'is_decoder'
,
False
)
self
.
is_decoder
=
kwargs
.
pop
(
'is_decoder'
,
False
)
# Fine-tuning task arguments
self
.
finetuning_task
=
kwargs
.
pop
(
'finetuning_task'
,
None
)
self
.
num_labels
=
kwargs
.
pop
(
'num_labels'
,
2
)
self
.
id2label
=
kwargs
.
pop
(
'id2label'
,
{
i
:
'LABEL_{}'
.
format
(
i
)
for
i
in
range
(
self
.
num_labels
)})
self
.
id2label
=
dict
((
int
(
key
),
value
)
for
key
,
value
in
self
.
id2label
.
items
())
self
.
label2id
=
kwargs
.
pop
(
'label2id'
,
dict
(
zip
(
self
.
id2label
.
values
(),
self
.
id2label
.
keys
())))
self
.
label2id
=
dict
((
key
,
int
(
value
))
for
key
,
value
in
self
.
label2id
.
items
())
# Additional attributes without default values
for
key
,
value
in
kwargs
.
items
():
try
:
setattr
(
self
,
key
,
value
)
except
AttributeError
as
err
:
logger
.
error
(
"Can't set {} with value {} for {}"
.
format
(
key
,
value
,
self
))
raise
err
def
save_pretrained
(
self
,
save_directory
):
def
save_pretrained
(
self
,
save_directory
):
""" Save a configuration object to the directory `save_directory`, so that it
""" Save a configuration object to the directory `save_directory`, so that it
can be re-loaded using the :func:`~transformers.PretrainedConfig.from_pretrained` class method.
can be re-loaded using the :func:`~transformers.PretrainedConfig.from_pretrained` class method.
...
@@ -183,17 +198,15 @@ class PretrainedConfig(object):
...
@@ -183,17 +198,15 @@ class PretrainedConfig(object):
@
classmethod
@
classmethod
def
from_dict
(
cls
,
json_object
):
def
from_dict
(
cls
,
json_object
):
"""Constructs a `Config` from a Python dictionary of parameters."""
"""Constructs a `Config` from a Python dictionary of parameters."""
config
=
cls
(
vocab_size_or_config_json_file
=-
1
)
return
cls
(
**
json_object
)
for
key
,
value
in
json_object
.
items
():
setattr
(
config
,
key
,
value
)
return
config
@
classmethod
@
classmethod
def
from_json_file
(
cls
,
json_file
):
def
from_json_file
(
cls
,
json_file
):
"""Constructs a `Config` from a json file of parameters."""
"""Constructs a `Config` from a json file of parameters."""
with
open
(
json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
with
open
(
json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
text
=
reader
.
read
()
text
=
reader
.
read
()
return
cls
.
from_dict
(
json
.
loads
(
text
))
dict_obj
=
json
.
loads
(
text
)
return
cls
(
**
dict_obj
)
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
return
self
.
__dict__
==
other
.
__dict__
return
self
.
__dict__
==
other
.
__dict__
...
...
transformers/configuration_xlm.py
View file @
47f0e3cf
...
@@ -42,7 +42,7 @@ class XLMConfig(PretrainedConfig):
...
@@ -42,7 +42,7 @@ class XLMConfig(PretrainedConfig):
"""Configuration class to store the configuration of a `XLMModel`.
"""Configuration class to store the configuration of a `XLMModel`.
Args:
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `XLMModel`.
vocab_size: Vocabulary size of `inputs_ids` in `XLMModel`.
d_model: Size of the encoder layers and the pooler layer.
d_model: Size of the encoder layers and the pooler layer.
n_layer: Number of hidden layers in the Transformer encoder.
n_layer: Number of hidden layers in the Transformer encoder.
n_head: Number of attention heads for each attention layer in
n_head: Number of attention heads for each attention layer in
...
@@ -81,7 +81,7 @@ class XLMConfig(PretrainedConfig):
...
@@ -81,7 +81,7 @@ class XLMConfig(PretrainedConfig):
pretrained_config_archive_map
=
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
pretrained_config_archive_map
=
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
30145
,
vocab_size
=
30145
,
emb_dim
=
2048
,
emb_dim
=
2048
,
n_layers
=
12
,
n_layers
=
12
,
n_heads
=
16
,
n_heads
=
16
,
...
@@ -103,9 +103,6 @@ class XLMConfig(PretrainedConfig):
...
@@ -103,9 +103,6 @@ class XLMConfig(PretrainedConfig):
unk_index
=
3
,
unk_index
=
3
,
mask_index
=
5
,
mask_index
=
5
,
is_encoder
=
True
,
is_encoder
=
True
,
finetuning_task
=
None
,
num_labels
=
2
,
summary_type
=
'first'
,
summary_type
=
'first'
,
summary_use_proj
=
True
,
summary_use_proj
=
True
,
summary_activation
=
None
,
summary_activation
=
None
,
...
@@ -117,15 +114,7 @@ class XLMConfig(PretrainedConfig):
...
@@ -117,15 +114,7 @@ class XLMConfig(PretrainedConfig):
"""Constructs XLMConfig.
"""Constructs XLMConfig.
"""
"""
super
(
XLMConfig
,
self
).
__init__
(
**
kwargs
)
super
(
XLMConfig
,
self
).
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
n_words
=
vocab_size_or_config_json_file
self
.
emb_dim
=
emb_dim
self
.
emb_dim
=
emb_dim
self
.
n_layers
=
n_layers
self
.
n_layers
=
n_layers
self
.
n_heads
=
n_heads
self
.
n_heads
=
n_heads
...
@@ -147,8 +136,6 @@ class XLMConfig(PretrainedConfig):
...
@@ -147,8 +136,6 @@ class XLMConfig(PretrainedConfig):
self
.
max_position_embeddings
=
max_position_embeddings
self
.
max_position_embeddings
=
max_position_embeddings
self
.
embed_init_std
=
embed_init_std
self
.
embed_init_std
=
embed_init_std
self
.
init_std
=
init_std
self
.
init_std
=
init_std
self
.
finetuning_task
=
finetuning_task
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_activation
=
summary_activation
...
@@ -156,17 +143,14 @@ class XLMConfig(PretrainedConfig):
...
@@ -156,17 +143,14 @@ class XLMConfig(PretrainedConfig):
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_first_dropout
=
summary_first_dropout
self
.
start_n_top
=
start_n_top
self
.
start_n_top
=
start_n_top
self
.
end_n_top
=
end_n_top
self
.
end_n_top
=
end_n_top
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)"
)
@
property
@
property
def
vocab_size
(
self
):
def
n_words
(
self
):
# For backward compatibility
return
self
.
n_words
return
self
.
vocab_size
@
vocab_size
.
setter
@
n_words
.
setter
def
vocab_size
(
self
,
value
):
def
n_words
(
self
,
value
):
# For backward compatibility
self
.
n_words
=
value
self
.
vocab_size
=
value
@
property
@
property
def
hidden_size
(
self
):
def
hidden_size
(
self
):
...
...
transformers/configuration_xlnet.py
View file @
47f0e3cf
...
@@ -35,7 +35,7 @@ class XLNetConfig(PretrainedConfig):
...
@@ -35,7 +35,7 @@ class XLNetConfig(PretrainedConfig):
"""Configuration class to store the configuration of a ``XLNetModel``.
"""Configuration class to store the configuration of a ``XLNetModel``.
Args:
Args:
vocab_size
_or_config_json_file
: Vocabulary size of ``inputs_ids`` in ``XLNetModel``.
vocab_size: Vocabulary size of ``inputs_ids`` in ``XLNetModel``.
d_model: Size of the encoder layers and the pooler layer.
d_model: Size of the encoder layers and the pooler layer.
n_layer: Number of hidden layers in the Transformer encoder.
n_layer: Number of hidden layers in the Transformer encoder.
n_head: Number of attention heads for each attention layer in
n_head: Number of attention heads for each attention layer in
...
@@ -72,28 +72,22 @@ class XLNetConfig(PretrainedConfig):
...
@@ -72,28 +72,22 @@ class XLNetConfig(PretrainedConfig):
pretrained_config_archive_map
=
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
pretrained_config_archive_map
=
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
32000
,
vocab_size
=
32000
,
d_model
=
1024
,
d_model
=
1024
,
n_layer
=
24
,
n_layer
=
24
,
n_head
=
16
,
n_head
=
16
,
d_inner
=
4096
,
d_inner
=
4096
,
max_position_embeddings
=
512
,
ff_activation
=
"gelu"
,
ff_activation
=
"gelu"
,
untie_r
=
True
,
untie_r
=
True
,
attn_type
=
"bi"
,
attn_type
=
"bi"
,
initializer_range
=
0.02
,
initializer_range
=
0.02
,
layer_norm_eps
=
1e-12
,
layer_norm_eps
=
1e-12
,
dropout
=
0.1
,
dropout
=
0.1
,
mem_len
=
None
,
mem_len
=
None
,
reuse_len
=
None
,
reuse_len
=
None
,
bi_data
=
False
,
bi_data
=
False
,
clamp_len
=-
1
,
clamp_len
=-
1
,
same_length
=
False
,
same_length
=
False
,
finetuning_task
=
None
,
num_labels
=
2
,
summary_type
=
'last'
,
summary_type
=
'last'
,
summary_use_proj
=
True
,
summary_use_proj
=
True
,
summary_activation
=
'tanh'
,
summary_activation
=
'tanh'
,
...
@@ -104,15 +98,7 @@ class XLNetConfig(PretrainedConfig):
...
@@ -104,15 +98,7 @@ class XLNetConfig(PretrainedConfig):
"""Constructs XLNetConfig.
"""Constructs XLNetConfig.
"""
"""
super
(
XLNetConfig
,
self
).
__init__
(
**
kwargs
)
super
(
XLNetConfig
,
self
).
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
setattr
(
config
,
key
,
value
)
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
n_token
=
vocab_size_or_config_json_file
self
.
d_model
=
d_model
self
.
d_model
=
d_model
self
.
n_layer
=
n_layer
self
.
n_layer
=
n_layer
self
.
n_head
=
n_head
self
.
n_head
=
n_head
...
@@ -133,29 +119,24 @@ class XLNetConfig(PretrainedConfig):
...
@@ -133,29 +119,24 @@ class XLNetConfig(PretrainedConfig):
self
.
clamp_len
=
clamp_len
self
.
clamp_len
=
clamp_len
self
.
same_length
=
same_length
self
.
same_length
=
same_length
self
.
finetuning_task
=
finetuning_task
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_activation
=
summary_activation
self
.
summary_last_dropout
=
summary_last_dropout
self
.
summary_last_dropout
=
summary_last_dropout
self
.
start_n_top
=
start_n_top
self
.
start_n_top
=
start_n_top
self
.
end_n_top
=
end_n_top
self
.
end_n_top
=
end_n_top
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)"
)
@
property
@
property
def
max_position_embeddings
(
self
):
def
max_position_embeddings
(
self
):
return
-
1
return
-
1
@
property
@
property
def
vocab_size
(
self
):
def
n_token
(
self
):
# Backward compatibility
return
self
.
n_token
return
self
.
vocab_size
@
vocab_size
.
setter
@
n_token
.
setter
def
vocab_size
(
self
,
value
):
def
n_token
(
self
,
value
):
# Backward compatibility
self
.
n_token
=
value
self
.
vocab_size
=
value
@
property
@
property
def
hidden_size
(
self
):
def
hidden_size
(
self
):
...
...
transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
View file @
47f0e3cf
...
@@ -46,7 +46,7 @@ def convert_roberta_checkpoint_to_pytorch(roberta_checkpoint_path, pytorch_dump_
...
@@ -46,7 +46,7 @@ def convert_roberta_checkpoint_to_pytorch(roberta_checkpoint_path, pytorch_dump_
roberta
=
FairseqRobertaModel
.
from_pretrained
(
roberta_checkpoint_path
)
roberta
=
FairseqRobertaModel
.
from_pretrained
(
roberta_checkpoint_path
)
roberta
.
eval
()
# disable dropout
roberta
.
eval
()
# disable dropout
config
=
BertConfig
(
config
=
BertConfig
(
vocab_size
_or_config_json_file
=
50265
,
vocab_size
=
50265
,
hidden_size
=
roberta
.
args
.
encoder_embed_dim
,
hidden_size
=
roberta
.
args
.
encoder_embed_dim
,
num_hidden_layers
=
roberta
.
args
.
encoder_layers
,
num_hidden_layers
=
roberta
.
args
.
encoder_layers
,
num_attention_heads
=
roberta
.
args
.
encoder_attention_heads
,
num_attention_heads
=
roberta
.
args
.
encoder_attention_heads
,
...
...
transformers/modeling_gpt2.py
View file @
47f0e3cf
...
@@ -634,6 +634,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
...
@@ -634,6 +634,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
GPT2DoubleHeadsModel
,
self
).
__init__
(
config
)
super
(
GPT2DoubleHeadsModel
,
self
).
__init__
(
config
)
config
.
num_labels
=
1
self
.
transformer
=
GPT2Model
(
config
)
self
.
transformer
=
GPT2Model
(
config
)
self
.
lm_head
=
nn
.
Linear
(
config
.
n_embd
,
config
.
vocab_size
,
bias
=
False
)
self
.
lm_head
=
nn
.
Linear
(
config
.
n_embd
,
config
.
vocab_size
,
bias
=
False
)
self
.
multiple_choice_head
=
SequenceSummary
(
config
)
self
.
multiple_choice_head
=
SequenceSummary
(
config
)
...
...
transformers/modeling_tf_gpt2.py
View file @
47f0e3cf
...
@@ -574,6 +574,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
...
@@ -574,6 +574,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
"""
"""
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
TFGPT2DoubleHeadsModel
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
super
(
TFGPT2DoubleHeadsModel
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
config
.
num_labels
=
1
self
.
transformer
=
TFGPT2MainLayer
(
config
,
name
=
'transformer'
)
self
.
transformer
=
TFGPT2MainLayer
(
config
,
name
=
'transformer'
)
self
.
multiple_choice_head
=
TFSequenceSummary
(
config
,
initializer_range
=
config
.
initializer_range
,
name
=
'multiple_choice_head'
)
self
.
multiple_choice_head
=
TFSequenceSummary
(
config
,
initializer_range
=
config
.
initializer_range
,
name
=
'multiple_choice_head'
)
...
...
transformers/modeling_tf_transfo_xl.py
View file @
47f0e3cf
...
@@ -353,7 +353,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer):
...
@@ -353,7 +353,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer):
self
.
output_attentions
=
config
.
output_attentions
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
n_token
=
config
.
n_token
self
.
n_token
=
config
.
vocab_size
self
.
d_embed
=
config
.
d_embed
self
.
d_embed
=
config
.
d_embed
self
.
d_model
=
config
.
d_model
self
.
d_model
=
config
.
d_model
...
@@ -361,7 +361,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer):
...
@@ -361,7 +361,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer):
self
.
d_head
=
config
.
d_head
self
.
d_head
=
config
.
d_head
self
.
untie_r
=
config
.
untie_r
self
.
untie_r
=
config
.
untie_r
self
.
word_emb
=
TFAdaptiveEmbedding
(
config
.
n_token
,
config
.
d_embed
,
config
.
d_model
,
config
.
cutoffs
,
self
.
word_emb
=
TFAdaptiveEmbedding
(
config
.
vocab_size
,
config
.
d_embed
,
config
.
d_model
,
config
.
cutoffs
,
div_val
=
config
.
div_val
,
init_std
=
config
.
init_std
,
name
=
'word_emb'
)
div_val
=
config
.
div_val
,
init_std
=
config
.
init_std
,
name
=
'word_emb'
)
self
.
drop
=
tf
.
keras
.
layers
.
Dropout
(
config
.
dropout
)
self
.
drop
=
tf
.
keras
.
layers
.
Dropout
(
config
.
dropout
)
...
@@ -729,7 +729,7 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
...
@@ -729,7 +729,7 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
raise
NotImplementedError
raise
NotImplementedError
# use adaptive softmax (including standard softmax)
# use adaptive softmax (including standard softmax)
else
:
else
:
self
.
crit
=
TFAdaptiveSoftmaxMask
(
config
.
n_token
,
config
.
d_embed
,
config
.
d_model
,
self
.
crit
=
TFAdaptiveSoftmaxMask
(
config
.
vocab_size
,
config
.
d_embed
,
config
.
d_model
,
config
.
cutoffs
,
div_val
=
config
.
div_val
,
name
=
'crit'
)
config
.
cutoffs
,
div_val
=
config
.
div_val
,
name
=
'crit'
)
def
reset_length
(
self
,
tgt_len
,
ext_len
,
mem_len
):
def
reset_length
(
self
,
tgt_len
,
ext_len
,
mem_len
):
...
...
transformers/modeling_tf_transfo_xl_utilities.py
View file @
47f0e3cf
...
@@ -25,15 +25,15 @@ import tensorflow as tf
...
@@ -25,15 +25,15 @@ import tensorflow as tf
from
.modeling_tf_utils
import
shape_list
from
.modeling_tf_utils
import
shape_list
class
TFAdaptiveSoftmaxMask
(
tf
.
keras
.
layers
.
Layer
):
class
TFAdaptiveSoftmaxMask
(
tf
.
keras
.
layers
.
Layer
):
def
__init__
(
self
,
n_token
,
d_embed
,
d_proj
,
cutoffs
,
div_val
=
1
,
def
__init__
(
self
,
vocab_size
,
d_embed
,
d_proj
,
cutoffs
,
div_val
=
1
,
keep_order
=
False
,
**
kwargs
):
keep_order
=
False
,
**
kwargs
):
super
(
TFAdaptiveSoftmaxMask
,
self
).
__init__
(
**
kwargs
)
super
(
TFAdaptiveSoftmaxMask
,
self
).
__init__
(
**
kwargs
)
self
.
n_token
=
n_token
self
.
vocab_size
=
vocab_size
self
.
d_embed
=
d_embed
self
.
d_embed
=
d_embed
self
.
d_proj
=
d_proj
self
.
d_proj
=
d_proj
self
.
cutoffs
=
cutoffs
+
[
n_token
]
self
.
cutoffs
=
cutoffs
+
[
vocab_size
]
self
.
cutoff_ends
=
[
0
]
+
self
.
cutoffs
self
.
cutoff_ends
=
[
0
]
+
self
.
cutoffs
self
.
div_val
=
div_val
self
.
div_val
=
div_val
...
@@ -66,11 +66,11 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
...
@@ -66,11 +66,11 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
self
.
out_projs
.
append
(
weight
)
self
.
out_projs
.
append
(
weight
)
else
:
else
:
self
.
out_projs
.
append
(
None
)
self
.
out_projs
.
append
(
None
)
weight
=
self
.
add_weight
(
shape
=
(
self
.
n_token
,
self
.
d_embed
,),
weight
=
self
.
add_weight
(
shape
=
(
self
.
vocab_size
,
self
.
d_embed
,),
initializer
=
'zeros'
,
initializer
=
'zeros'
,
trainable
=
True
,
trainable
=
True
,
name
=
'out_layers_._{}_._weight'
.
format
(
i
))
name
=
'out_layers_._{}_._weight'
.
format
(
i
))
bias
=
self
.
add_weight
(
shape
=
(
self
.
n_token
,),
bias
=
self
.
add_weight
(
shape
=
(
self
.
vocab_size
,),
initializer
=
'zeros'
,
initializer
=
'zeros'
,
trainable
=
True
,
trainable
=
True
,
name
=
'out_layers_._{}_._bias'
.
format
(
i
))
name
=
'out_layers_._{}_._bias'
.
format
(
i
))
...
@@ -114,7 +114,7 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
...
@@ -114,7 +114,7 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
hidden
,
target
=
inputs
hidden
,
target
=
inputs
head_logprob
=
0
head_logprob
=
0
if
self
.
n_clusters
==
0
:
if
self
.
n_clusters
==
0
:
softmax_b
=
tf
.
get_variable
(
'bias'
,
[
n_token
],
initializer
=
tf
.
zeros_initializer
())
softmax_b
=
tf
.
get_variable
(
'bias'
,
[
self
.
config
.
vocab_size
],
initializer
=
tf
.
zeros_initializer
())
output
=
self
.
_logit
(
hidden
,
self
.
out_layers
[
0
][
0
],
self
.
out_layers
[
0
][
1
],
self
.
out_projs
[
0
])
output
=
self
.
_logit
(
hidden
,
self
.
out_layers
[
0
][
0
],
self
.
out_layers
[
0
][
1
],
self
.
out_projs
[
0
])
if
target
is
not
None
:
if
target
is
not
None
:
loss
=
tf
.
nn
.
sparse_softmax_cross_entropy_with_logits
(
labels
=
target
,
logits
=
output
)
loss
=
tf
.
nn
.
sparse_softmax_cross_entropy_with_logits
(
labels
=
target
,
logits
=
output
)
...
...
transformers/modeling_tf_xlnet.py
View file @
47f0e3cf
...
@@ -366,7 +366,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
...
@@ -366,7 +366,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
self
.
use_bfloat16
=
config
.
use_bfloat16
self
.
use_bfloat16
=
config
.
use_bfloat16
self
.
initializer_range
=
config
.
initializer_range
self
.
initializer_range
=
config
.
initializer_range
self
.
word_embedding
=
TFSharedEmbeddings
(
config
.
n_token
,
config
.
d_model
,
initializer_range
=
config
.
initializer_range
,
name
=
'word_embedding'
)
self
.
word_embedding
=
TFSharedEmbeddings
(
config
.
vocab_size
,
config
.
d_model
,
initializer_range
=
config
.
initializer_range
,
name
=
'word_embedding'
)
self
.
layer
=
[
TFXLNetLayer
(
config
,
name
=
'layer_._{}'
.
format
(
i
))
for
i
in
range
(
config
.
n_layer
)]
self
.
layer
=
[
TFXLNetLayer
(
config
,
name
=
'layer_._{}'
.
format
(
i
))
for
i
in
range
(
config
.
n_layer
)]
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
dropout
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
dropout
)
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment