Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
47f0e3cf
Commit
47f0e3cf
authored
Dec 13, 2019
by
thomwolf
Browse files
cleaning up configuration classes
parent
7296f101
Changes
43
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
199 additions
and
298 deletions
+199
-298
examples/summarization/configuration_bertabs.py
examples/summarization/configuration_bertabs.py
+5
-5
templates/adding_a_new_model/configuration_xxx.py
templates/adding_a_new_model/configuration_xxx.py
+6
-6
templates/adding_a_new_model/tests/modeling_tf_xxx_test.py
templates/adding_a_new_model/tests/modeling_tf_xxx_test.py
+1
-1
templates/adding_a_new_model/tests/modeling_xxx_test.py
templates/adding_a_new_model/tests/modeling_xxx_test.py
+1
-1
transformers/configuration_albert.py
transformers/configuration_albert.py
+3
-3
transformers/configuration_bert.py
transformers/configuration_bert.py
+14
-24
transformers/configuration_ctrl.py
transformers/configuration_ctrl.py
+4
-19
transformers/configuration_distilbert.py
transformers/configuration_distilbert.py
+15
-25
transformers/configuration_gpt2.py
transformers/configuration_gpt2.py
+19
-36
transformers/configuration_openai.py
transformers/configuration_openai.py
+20
-37
transformers/configuration_transfo_xl.py
transformers/configuration_transfo_xl.py
+8
-18
transformers/configuration_utils.py
transformers/configuration_utils.py
+20
-7
transformers/configuration_xlm.py
transformers/configuration_xlm.py
+36
-52
transformers/configuration_xlnet.py
transformers/configuration_xlnet.py
+34
-53
transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
...convert_roberta_original_pytorch_checkpoint_to_pytorch.py
+1
-1
transformers/modeling_gpt2.py
transformers/modeling_gpt2.py
+1
-0
transformers/modeling_tf_gpt2.py
transformers/modeling_tf_gpt2.py
+1
-0
transformers/modeling_tf_transfo_xl.py
transformers/modeling_tf_transfo_xl.py
+3
-3
transformers/modeling_tf_transfo_xl_utilities.py
transformers/modeling_tf_transfo_xl_utilities.py
+6
-6
transformers/modeling_tf_xlnet.py
transformers/modeling_tf_xlnet.py
+1
-1
No files found.
examples/summarization/configuration_bertabs.py
View file @
47f0e3cf
...
...
@@ -65,7 +65,7 @@ class BertAbsConfig(PretrainedConfig):
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
30522
,
vocab_size
=
30522
,
max_pos
=
512
,
enc_layers
=
6
,
enc_hidden_size
=
512
,
...
...
@@ -81,14 +81,14 @@ class BertAbsConfig(PretrainedConfig):
):
super
(
BertAbsConfig
,
self
).
__init__
(
**
kwargs
)
if
self
.
_input_is_path_to_json
(
vocab_size
_or_config_json_file
):
path_to_json
=
vocab_size
_or_config_json_file
if
self
.
_input_is_path_to_json
(
vocab_size
):
path_to_json
=
vocab_size
with
open
(
path_to_json
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size
_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size
_or_config_json_file
elif
isinstance
(
vocab_size
,
int
):
self
.
vocab_size
=
vocab_size
self
.
max_pos
=
max_pos
self
.
enc_layers
=
enc_layers
...
...
templates/adding_a_new_model/configuration_xxx.py
View file @
47f0e3cf
...
...
@@ -39,7 +39,7 @@ class XxxConfig(PretrainedConfig):
Arguments:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `XxxModel`.
vocab_size: Vocabulary size of `inputs_ids` in `XxxModel`.
hidden_size: Size of the encoder layers and the pooler layer.
num_hidden_layers: Number of hidden layers in the Transformer encoder.
num_attention_heads: Number of attention heads for each attention layer in
...
...
@@ -64,7 +64,7 @@ class XxxConfig(PretrainedConfig):
pretrained_config_archive_map
=
XXX_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
50257
,
vocab_size
=
50257
,
n_positions
=
1024
,
n_ctx
=
1024
,
n_embd
=
768
,
...
...
@@ -84,7 +84,7 @@ class XxxConfig(PretrainedConfig):
summary_first_dropout
=
0.1
,
**
kwargs
):
super
(
XxxConfig
,
self
).
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
_or_config_json_file
if
isinstance
(
vocab_size
_or_config_json_file
,
six
.
string_types
)
else
-
1
self
.
vocab_size
=
vocab_size
if
isinstance
(
vocab_size
,
six
.
string_types
)
else
-
1
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
...
...
@@ -102,12 +102,12 @@ class XxxConfig(PretrainedConfig):
self
.
summary_activation
=
summary_activation
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
if
isinstance
(
vocab_size
_or_config_json_file
,
six
.
string_types
):
with
open
(
vocab_size
_or_config_json_file
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
if
isinstance
(
vocab_size
,
six
.
string_types
):
with
open
(
vocab_size
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
not
isinstance
(
vocab_size
_or_config_json_file
,
int
):
elif
not
isinstance
(
vocab_size
,
int
):
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
...
...
templates/adding_a_new_model/tests/modeling_tf_xxx_test.py
View file @
47f0e3cf
...
...
@@ -111,7 +111,7 @@ class TFXxxModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
XxxConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
...
...
templates/adding_a_new_model/tests/modeling_xxx_test.py
View file @
47f0e3cf
...
...
@@ -109,7 +109,7 @@ class XxxModelTest(CommonTestCases.CommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
XxxConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
...
...
transformers/configuration_albert.py
View file @
47f0e3cf
...
...
@@ -37,7 +37,7 @@ class AlbertConfig(PretrainedConfig):
pretrained_config_archive_map
=
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
30000
,
vocab_size
=
30000
,
embedding_size
=
128
,
hidden_size
=
4096
,
num_hidden_layers
=
12
,
...
...
@@ -83,7 +83,7 @@ class AlbertConfig(PretrainedConfig):
"""
super
(
AlbertConfig
,
self
).
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
_or_config_json_file
self
.
vocab_size
=
vocab_size
self
.
embedding_size
=
embedding_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
...
...
transformers/configuration_bert.py
View file @
47f0e3cf
...
...
@@ -56,7 +56,7 @@ class BertConfig(PretrainedConfig):
Arguments:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `BertModel`.
vocab_size: Vocabulary size of `inputs_ids` in `BertModel`.
hidden_size: Size of the encoder layers and the pooler layer.
num_hidden_layers: Number of hidden layers in the Transformer encoder.
num_attention_heads: Number of attention heads for each attention layer in
...
...
@@ -81,7 +81,7 @@ class BertConfig(PretrainedConfig):
pretrained_config_archive_map
=
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
30522
,
vocab_size
=
30522
,
hidden_size
=
768
,
num_hidden_layers
=
12
,
num_attention_heads
=
12
,
...
...
@@ -95,14 +95,7 @@ class BertConfig(PretrainedConfig):
layer_norm_eps
=
1e-12
,
**
kwargs
):
super
(
BertConfig
,
self
).
__init__
(
**
kwargs
)
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
...
...
@@ -114,6 +107,3 @@ class BertConfig(PretrainedConfig):
self
.
type_vocab_size
=
type_vocab_size
self
.
initializer_range
=
initializer_range
self
.
layer_norm_eps
=
layer_norm_eps
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)"
)
transformers/configuration_ctrl.py
View file @
47f0e3cf
...
...
@@ -31,7 +31,7 @@ class CTRLConfig(PretrainedConfig):
"""Configuration class to store the configuration of a `CTRLModel`.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
dff: Size of the inner dimension of the FFN.
...
...
@@ -52,7 +52,7 @@ class CTRLConfig(PretrainedConfig):
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
246534
,
vocab_size
=
246534
,
n_positions
=
256
,
n_ctx
=
256
,
n_embd
=
1280
,
...
...
@@ -64,8 +64,6 @@ class CTRLConfig(PretrainedConfig):
attn_pdrop
=
0.1
,
layer_norm_epsilon
=
1e-6
,
initializer_range
=
0.02
,
num_labels
=
1
,
summary_type
=
'cls_index'
,
summary_use_proj
=
True
,
summary_activation
=
None
,
...
...
@@ -76,7 +74,7 @@ class CTRLConfig(PretrainedConfig):
"""Constructs CTRLConfig.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
dff: Size of the inner dimension of the FFN.
...
...
@@ -94,8 +92,7 @@ class CTRLConfig(PretrainedConfig):
initializing all weight matrices.
"""
super
(
CTRLConfig
,
self
).
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size_or_config_json_file
if
isinstance
(
vocab_size_or_config_json_file
,
int
)
else
-
1
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
...
...
@@ -108,23 +105,11 @@ class CTRLConfig(PretrainedConfig):
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
not
isinstance
(
vocab_size_or_config_json_file
,
int
):
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
)
@
property
def
max_position_embeddings
(
self
):
...
...
transformers/configuration_distilbert.py
View file @
47f0e3cf
...
...
@@ -37,7 +37,7 @@ class DistilBertConfig(PretrainedConfig):
pretrained_config_archive_map
=
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
30522
,
vocab_size
=
30522
,
max_position_embeddings
=
512
,
sinusoidal_pos_embds
=
False
,
n_layers
=
6
,
...
...
@@ -53,15 +53,7 @@ class DistilBertConfig(PretrainedConfig):
seq_classif_dropout
=
0.2
,
**
kwargs
):
super
(
DistilBertConfig
,
self
).
__init__
(
**
kwargs
)
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
vocab_size
=
vocab_size
self
.
max_position_embeddings
=
max_position_embeddings
self
.
sinusoidal_pos_embds
=
sinusoidal_pos_embds
self
.
n_layers
=
n_layers
...
...
@@ -75,9 +67,7 @@ class DistilBertConfig(PretrainedConfig):
self
.
tie_weights_
=
tie_weights_
self
.
qa_dropout
=
qa_dropout
self
.
seq_classif_dropout
=
seq_classif_dropout
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)"
)
@
property
def
hidden_size
(
self
):
return
self
.
dim
...
...
transformers/configuration_gpt2.py
View file @
47f0e3cf
...
...
@@ -36,7 +36,7 @@ class GPT2Config(PretrainedConfig):
"""Configuration class to store the configuration of a `GPT2Model`.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
n_embd: Dimensionality of the embeddings and hidden states.
...
...
@@ -56,7 +56,7 @@ class GPT2Config(PretrainedConfig):
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
50257
,
vocab_size
=
50257
,
n_positions
=
1024
,
n_ctx
=
1024
,
n_embd
=
768
,
...
...
@@ -67,8 +67,6 @@ class GPT2Config(PretrainedConfig):
attn_pdrop
=
0.1
,
layer_norm_epsilon
=
1e-5
,
initializer_range
=
0.02
,
num_labels
=
1
,
summary_type
=
'cls_index'
,
summary_use_proj
=
True
,
summary_activation
=
None
,
...
...
@@ -79,7 +77,7 @@ class GPT2Config(PretrainedConfig):
"""Constructs GPT2Config.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
n_embd: Dimensionality of the embeddings and hidden states.
...
...
@@ -96,15 +94,7 @@ class GPT2Config(PretrainedConfig):
initializing all weight matrices.
"""
super
(
GPT2Config
,
self
).
__init__
(
**
kwargs
)
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
...
...
@@ -115,18 +105,11 @@ class GPT2Config(PretrainedConfig):
self
.
attn_pdrop
=
attn_pdrop
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
)
@
property
def
max_position_embeddings
(
self
):
...
...
transformers/configuration_openai.py
View file @
47f0e3cf
...
...
@@ -35,7 +35,7 @@ class OpenAIGPTConfig(PretrainedConfig):
Configuration class to store the configuration of a `OpenAIGPTModel`.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `OpenAIGPTModel` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `OpenAIGPTModel` or a configuration json file.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
n_embd: Dimensionality of the embeddings and hidden states.
...
...
@@ -58,7 +58,7 @@ class OpenAIGPTConfig(PretrainedConfig):
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
40478
,
vocab_size
=
40478
,
n_positions
=
512
,
n_ctx
=
512
,
n_embd
=
768
,
...
...
@@ -71,8 +71,6 @@ class OpenAIGPTConfig(PretrainedConfig):
layer_norm_epsilon
=
1e-5
,
initializer_range
=
0.02
,
predict_special_tokens
=
True
,
num_labels
=
1
,
summary_type
=
'cls_index'
,
summary_use_proj
=
True
,
summary_activation
=
None
,
...
...
@@ -83,15 +81,7 @@ class OpenAIGPTConfig(PretrainedConfig):
"""Constructs OpenAIGPTConfig.
"""
super
(
OpenAIGPTConfig
,
self
).
__init__
(
**
kwargs
)
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
...
...
@@ -104,18 +94,11 @@ class OpenAIGPTConfig(PretrainedConfig):
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
initializer_range
=
initializer_range
self
.
predict_special_tokens
=
predict_special_tokens
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
)
@
property
def
max_position_embeddings
(
self
):
...
...
transformers/configuration_transfo_xl.py
View file @
47f0e3cf
...
...
@@ -34,7 +34,7 @@ class TransfoXLConfig(PretrainedConfig):
"""Configuration class to store the configuration of a `TransfoXLModel`.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `TransfoXLModel` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `TransfoXLModel` or a configuration json file.
cutoffs: cutoffs for the adaptive softmax
d_model: Dimensionality of the model's hidden states.
d_embed: Dimensionality of the embeddings
...
...
@@ -68,7 +68,7 @@ class TransfoXLConfig(PretrainedConfig):
pretrained_config_archive_map
=
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
267735
,
vocab_size
=
267735
,
cutoffs
=
[
20000
,
40000
,
200000
],
d_model
=
1024
,
d_embed
=
1024
,
...
...
@@ -100,7 +100,7 @@ class TransfoXLConfig(PretrainedConfig):
"""Constructs TransfoXLConfig.
"""
super
(
TransfoXLConfig
,
self
).
__init__
(
**
kwargs
)
self
.
n_token
=
vocab_size_or_config_json_file
if
isinstance
(
vocab_size_or_config_json_file
,
int
)
else
-
1
self
.
vocab_size
=
vocab_size
self
.
cutoffs
=
[]
self
.
cutoffs
.
extend
(
cutoffs
)
self
.
tie_weight
=
tie_weight
...
...
@@ -133,27 +133,17 @@ class TransfoXLConfig(PretrainedConfig):
self
.
init_std
=
init_std
self
.
layer_norm_epsilon
=
layer_norm_epsilon
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
not
isinstance
(
vocab_size_or_config_json_file
,
int
):
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)"
)
@
property
def
max_position_embeddings
(
self
):
return
self
.
tgt_len
+
self
.
ext_len
+
self
.
mem_len
@
property
def
vocab_size
(
self
):
return
self
.
n_token
def
n_token
(
self
):
# Backward compatibility
return
self
.
vocab_size
@
vocab_size
.
setter
def
vocab_size
(
self
,
value
):
self
.
n_token
=
value
@
n_token
.
setter
def
n_token
(
self
,
value
):
# Backward compatibility
self
.
vocab_size
=
value
@
property
def
hidden_size
(
self
):
...
...
transformers/configuration_utils.py
View file @
47f0e3cf
...
...
@@ -49,8 +49,7 @@ class PretrainedConfig(object):
pretrained_config_archive_map
=
{}
def
__init__
(
self
,
**
kwargs
):
self
.
finetuning_task
=
kwargs
.
pop
(
'finetuning_task'
,
None
)
self
.
num_labels
=
kwargs
.
pop
(
'num_labels'
,
2
)
# Attributes with defaults
self
.
output_attentions
=
kwargs
.
pop
(
'output_attentions'
,
False
)
self
.
output_hidden_states
=
kwargs
.
pop
(
'output_hidden_states'
,
False
)
self
.
output_past
=
kwargs
.
pop
(
'output_past'
,
True
)
# Not used by all models
...
...
@@ -59,6 +58,22 @@ class PretrainedConfig(object):
self
.
pruned_heads
=
kwargs
.
pop
(
'pruned_heads'
,
{})
self
.
is_decoder
=
kwargs
.
pop
(
'is_decoder'
,
False
)
# Fine-tuning task arguments
self
.
finetuning_task
=
kwargs
.
pop
(
'finetuning_task'
,
None
)
self
.
num_labels
=
kwargs
.
pop
(
'num_labels'
,
2
)
self
.
id2label
=
kwargs
.
pop
(
'id2label'
,
{
i
:
'LABEL_{}'
.
format
(
i
)
for
i
in
range
(
self
.
num_labels
)})
self
.
id2label
=
dict
((
int
(
key
),
value
)
for
key
,
value
in
self
.
id2label
.
items
())
self
.
label2id
=
kwargs
.
pop
(
'label2id'
,
dict
(
zip
(
self
.
id2label
.
values
(),
self
.
id2label
.
keys
())))
self
.
label2id
=
dict
((
key
,
int
(
value
))
for
key
,
value
in
self
.
label2id
.
items
())
# Additional attributes without default values
for
key
,
value
in
kwargs
.
items
():
try
:
setattr
(
self
,
key
,
value
)
except
AttributeError
as
err
:
logger
.
error
(
"Can't set {} with value {} for {}"
.
format
(
key
,
value
,
self
))
raise
err
def
save_pretrained
(
self
,
save_directory
):
""" Save a configuration object to the directory `save_directory`, so that it
can be re-loaded using the :func:`~transformers.PretrainedConfig.from_pretrained` class method.
...
...
@@ -183,17 +198,15 @@ class PretrainedConfig(object):
@
classmethod
def
from_dict
(
cls
,
json_object
):
"""Constructs a `Config` from a Python dictionary of parameters."""
config
=
cls
(
vocab_size_or_config_json_file
=-
1
)
for
key
,
value
in
json_object
.
items
():
setattr
(
config
,
key
,
value
)
return
config
return
cls
(
**
json_object
)
@
classmethod
def
from_json_file
(
cls
,
json_file
):
"""Constructs a `Config` from a json file of parameters."""
with
open
(
json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
text
=
reader
.
read
()
return
cls
.
from_dict
(
json
.
loads
(
text
))
dict_obj
=
json
.
loads
(
text
)
return
cls
(
**
dict_obj
)
def
__eq__
(
self
,
other
):
return
self
.
__dict__
==
other
.
__dict__
...
...
transformers/configuration_xlm.py
View file @
47f0e3cf
...
...
@@ -42,7 +42,7 @@ class XLMConfig(PretrainedConfig):
"""Configuration class to store the configuration of a `XLMModel`.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `XLMModel`.
vocab_size: Vocabulary size of `inputs_ids` in `XLMModel`.
d_model: Size of the encoder layers and the pooler layer.
n_layer: Number of hidden layers in the Transformer encoder.
n_head: Number of attention heads for each attention layer in
...
...
@@ -81,7 +81,7 @@ class XLMConfig(PretrainedConfig):
pretrained_config_archive_map
=
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
30145
,
vocab_size
=
30145
,
emb_dim
=
2048
,
n_layers
=
12
,
n_heads
=
16
,
...
...
@@ -103,9 +103,6 @@ class XLMConfig(PretrainedConfig):
unk_index
=
3
,
mask_index
=
5
,
is_encoder
=
True
,
finetuning_task
=
None
,
num_labels
=
2
,
summary_type
=
'first'
,
summary_use_proj
=
True
,
summary_activation
=
None
,
...
...
@@ -117,15 +114,7 @@ class XLMConfig(PretrainedConfig):
"""Constructs XLMConfig.
"""
super
(
XLMConfig
,
self
).
__init__
(
**
kwargs
)
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
n_words
=
vocab_size_or_config_json_file
self
.
vocab_size
=
vocab_size
self
.
emb_dim
=
emb_dim
self
.
n_layers
=
n_layers
self
.
n_heads
=
n_heads
...
...
@@ -147,8 +136,6 @@ class XLMConfig(PretrainedConfig):
self
.
max_position_embeddings
=
max_position_embeddings
self
.
embed_init_std
=
embed_init_std
self
.
init_std
=
init_std
self
.
finetuning_task
=
finetuning_task
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
...
...
@@ -156,17 +143,14 @@ class XLMConfig(PretrainedConfig):
self
.
summary_first_dropout
=
summary_first_dropout
self
.
start_n_top
=
start_n_top
self
.
end_n_top
=
end_n_top
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)"
)
@
property
def
vocab_size
(
self
):
return
self
.
n_words
def
n_words
(
self
):
# For backward compatibility
return
self
.
vocab_size
@
vocab_size
.
setter
def
vocab_size
(
self
,
value
):
self
.
n_words
=
value
@
n_words
.
setter
def
n_words
(
self
,
value
):
# For backward compatibility
self
.
vocab_size
=
value
@
property
def
hidden_size
(
self
):
...
...
transformers/configuration_xlnet.py
View file @
47f0e3cf
...
...
@@ -35,7 +35,7 @@ class XLNetConfig(PretrainedConfig):
"""Configuration class to store the configuration of a ``XLNetModel``.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of ``inputs_ids`` in ``XLNetModel``.
vocab_size: Vocabulary size of ``inputs_ids`` in ``XLNetModel``.
d_model: Size of the encoder layers and the pooler layer.
n_layer: Number of hidden layers in the Transformer encoder.
n_head: Number of attention heads for each attention layer in
...
...
@@ -72,28 +72,22 @@ class XLNetConfig(PretrainedConfig):
pretrained_config_archive_map
=
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
32000
,
vocab_size
=
32000
,
d_model
=
1024
,
n_layer
=
24
,
n_head
=
16
,
d_inner
=
4096
,
max_position_embeddings
=
512
,
ff_activation
=
"gelu"
,
untie_r
=
True
,
attn_type
=
"bi"
,
initializer_range
=
0.02
,
layer_norm_eps
=
1e-12
,
dropout
=
0.1
,
mem_len
=
None
,
reuse_len
=
None
,
bi_data
=
False
,
clamp_len
=-
1
,
same_length
=
False
,
finetuning_task
=
None
,
num_labels
=
2
,
summary_type
=
'last'
,
summary_use_proj
=
True
,
summary_activation
=
'tanh'
,
...
...
@@ -104,15 +98,7 @@ class XLNetConfig(PretrainedConfig):
"""Constructs XLNetConfig.
"""
super
(
XLNetConfig
,
self
).
__init__
(
**
kwargs
)
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
setattr
(
config
,
key
,
value
)
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
n_token
=
vocab_size_or_config_json_file
self
.
vocab_size
=
vocab_size
self
.
d_model
=
d_model
self
.
n_layer
=
n_layer
self
.
n_head
=
n_head
...
...
@@ -133,29 +119,24 @@ class XLNetConfig(PretrainedConfig):
self
.
clamp_len
=
clamp_len
self
.
same_length
=
same_length
self
.
finetuning_task
=
finetuning_task
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_last_dropout
=
summary_last_dropout
self
.
start_n_top
=
start_n_top
self
.
end_n_top
=
end_n_top
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)"
)
@
property
def
max_position_embeddings
(
self
):
return
-
1
@
property
def
vocab_size
(
self
):
return
self
.
n_token
def
n_token
(
self
):
# Backward compatibility
return
self
.
vocab_size
@
vocab_size
.
setter
def
vocab_size
(
self
,
value
):
self
.
n_token
=
value
@
n_token
.
setter
def
n_token
(
self
,
value
):
# Backward compatibility
self
.
vocab_size
=
value
@
property
def
hidden_size
(
self
):
...
...
transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
View file @
47f0e3cf
...
...
@@ -46,7 +46,7 @@ def convert_roberta_checkpoint_to_pytorch(roberta_checkpoint_path, pytorch_dump_
roberta
=
FairseqRobertaModel
.
from_pretrained
(
roberta_checkpoint_path
)
roberta
.
eval
()
# disable dropout
config
=
BertConfig
(
vocab_size
_or_config_json_file
=
50265
,
vocab_size
=
50265
,
hidden_size
=
roberta
.
args
.
encoder_embed_dim
,
num_hidden_layers
=
roberta
.
args
.
encoder_layers
,
num_attention_heads
=
roberta
.
args
.
encoder_attention_heads
,
...
...
transformers/modeling_gpt2.py
View file @
47f0e3cf
...
...
@@ -634,6 +634,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
GPT2DoubleHeadsModel
,
self
).
__init__
(
config
)
config
.
num_labels
=
1
self
.
transformer
=
GPT2Model
(
config
)
self
.
lm_head
=
nn
.
Linear
(
config
.
n_embd
,
config
.
vocab_size
,
bias
=
False
)
self
.
multiple_choice_head
=
SequenceSummary
(
config
)
...
...
transformers/modeling_tf_gpt2.py
View file @
47f0e3cf
...
...
@@ -574,6 +574,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
"""
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
TFGPT2DoubleHeadsModel
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
config
.
num_labels
=
1
self
.
transformer
=
TFGPT2MainLayer
(
config
,
name
=
'transformer'
)
self
.
multiple_choice_head
=
TFSequenceSummary
(
config
,
initializer_range
=
config
.
initializer_range
,
name
=
'multiple_choice_head'
)
...
...
transformers/modeling_tf_transfo_xl.py
View file @
47f0e3cf
...
...
@@ -353,7 +353,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer):
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
n_token
=
config
.
n_token
self
.
n_token
=
config
.
vocab_size
self
.
d_embed
=
config
.
d_embed
self
.
d_model
=
config
.
d_model
...
...
@@ -361,7 +361,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer):
self
.
d_head
=
config
.
d_head
self
.
untie_r
=
config
.
untie_r
self
.
word_emb
=
TFAdaptiveEmbedding
(
config
.
n_token
,
config
.
d_embed
,
config
.
d_model
,
config
.
cutoffs
,
self
.
word_emb
=
TFAdaptiveEmbedding
(
config
.
vocab_size
,
config
.
d_embed
,
config
.
d_model
,
config
.
cutoffs
,
div_val
=
config
.
div_val
,
init_std
=
config
.
init_std
,
name
=
'word_emb'
)
self
.
drop
=
tf
.
keras
.
layers
.
Dropout
(
config
.
dropout
)
...
...
@@ -729,7 +729,7 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
raise
NotImplementedError
# use adaptive softmax (including standard softmax)
else
:
self
.
crit
=
TFAdaptiveSoftmaxMask
(
config
.
n_token
,
config
.
d_embed
,
config
.
d_model
,
self
.
crit
=
TFAdaptiveSoftmaxMask
(
config
.
vocab_size
,
config
.
d_embed
,
config
.
d_model
,
config
.
cutoffs
,
div_val
=
config
.
div_val
,
name
=
'crit'
)
def
reset_length
(
self
,
tgt_len
,
ext_len
,
mem_len
):
...
...
transformers/modeling_tf_transfo_xl_utilities.py
View file @
47f0e3cf
...
...
@@ -25,15 +25,15 @@ import tensorflow as tf
from
.modeling_tf_utils
import
shape_list
class
TFAdaptiveSoftmaxMask
(
tf
.
keras
.
layers
.
Layer
):
def
__init__
(
self
,
n_token
,
d_embed
,
d_proj
,
cutoffs
,
div_val
=
1
,
def
__init__
(
self
,
vocab_size
,
d_embed
,
d_proj
,
cutoffs
,
div_val
=
1
,
keep_order
=
False
,
**
kwargs
):
super
(
TFAdaptiveSoftmaxMask
,
self
).
__init__
(
**
kwargs
)
self
.
n_token
=
n_token
self
.
vocab_size
=
vocab_size
self
.
d_embed
=
d_embed
self
.
d_proj
=
d_proj
self
.
cutoffs
=
cutoffs
+
[
n_token
]
self
.
cutoffs
=
cutoffs
+
[
vocab_size
]
self
.
cutoff_ends
=
[
0
]
+
self
.
cutoffs
self
.
div_val
=
div_val
...
...
@@ -66,11 +66,11 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
self
.
out_projs
.
append
(
weight
)
else
:
self
.
out_projs
.
append
(
None
)
weight
=
self
.
add_weight
(
shape
=
(
self
.
n_token
,
self
.
d_embed
,),
weight
=
self
.
add_weight
(
shape
=
(
self
.
vocab_size
,
self
.
d_embed
,),
initializer
=
'zeros'
,
trainable
=
True
,
name
=
'out_layers_._{}_._weight'
.
format
(
i
))
bias
=
self
.
add_weight
(
shape
=
(
self
.
n_token
,),
bias
=
self
.
add_weight
(
shape
=
(
self
.
vocab_size
,),
initializer
=
'zeros'
,
trainable
=
True
,
name
=
'out_layers_._{}_._bias'
.
format
(
i
))
...
...
@@ -114,7 +114,7 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
hidden
,
target
=
inputs
head_logprob
=
0
if
self
.
n_clusters
==
0
:
softmax_b
=
tf
.
get_variable
(
'bias'
,
[
n_token
],
initializer
=
tf
.
zeros_initializer
())
softmax_b
=
tf
.
get_variable
(
'bias'
,
[
self
.
config
.
vocab_size
],
initializer
=
tf
.
zeros_initializer
())
output
=
self
.
_logit
(
hidden
,
self
.
out_layers
[
0
][
0
],
self
.
out_layers
[
0
][
1
],
self
.
out_projs
[
0
])
if
target
is
not
None
:
loss
=
tf
.
nn
.
sparse_softmax_cross_entropy_with_logits
(
labels
=
target
,
logits
=
output
)
...
...
transformers/modeling_tf_xlnet.py
View file @
47f0e3cf
...
...
@@ -366,7 +366,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
self
.
use_bfloat16
=
config
.
use_bfloat16
self
.
initializer_range
=
config
.
initializer_range
self
.
word_embedding
=
TFSharedEmbeddings
(
config
.
n_token
,
config
.
d_model
,
initializer_range
=
config
.
initializer_range
,
name
=
'word_embedding'
)
self
.
word_embedding
=
TFSharedEmbeddings
(
config
.
vocab_size
,
config
.
d_model
,
initializer_range
=
config
.
initializer_range
,
name
=
'word_embedding'
)
self
.
layer
=
[
TFXLNetLayer
(
config
,
name
=
'layer_._{}'
.
format
(
i
))
for
i
in
range
(
config
.
n_layer
)]
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
dropout
)
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment