Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
47f0e3cf
"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "48706c7178127e7bcd6cccd90d941801e071a4a2"
Commit
47f0e3cf
authored
Dec 13, 2019
by
thomwolf
Browse files
cleaning up configuration classes
parent
7296f101
Changes
43
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
199 additions
and
298 deletions
+199
-298
examples/summarization/configuration_bertabs.py
examples/summarization/configuration_bertabs.py
+5
-5
templates/adding_a_new_model/configuration_xxx.py
templates/adding_a_new_model/configuration_xxx.py
+6
-6
templates/adding_a_new_model/tests/modeling_tf_xxx_test.py
templates/adding_a_new_model/tests/modeling_tf_xxx_test.py
+1
-1
templates/adding_a_new_model/tests/modeling_xxx_test.py
templates/adding_a_new_model/tests/modeling_xxx_test.py
+1
-1
transformers/configuration_albert.py
transformers/configuration_albert.py
+3
-3
transformers/configuration_bert.py
transformers/configuration_bert.py
+14
-24
transformers/configuration_ctrl.py
transformers/configuration_ctrl.py
+4
-19
transformers/configuration_distilbert.py
transformers/configuration_distilbert.py
+15
-25
transformers/configuration_gpt2.py
transformers/configuration_gpt2.py
+19
-36
transformers/configuration_openai.py
transformers/configuration_openai.py
+20
-37
transformers/configuration_transfo_xl.py
transformers/configuration_transfo_xl.py
+8
-18
transformers/configuration_utils.py
transformers/configuration_utils.py
+20
-7
transformers/configuration_xlm.py
transformers/configuration_xlm.py
+36
-52
transformers/configuration_xlnet.py
transformers/configuration_xlnet.py
+34
-53
transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
...convert_roberta_original_pytorch_checkpoint_to_pytorch.py
+1
-1
transformers/modeling_gpt2.py
transformers/modeling_gpt2.py
+1
-0
transformers/modeling_tf_gpt2.py
transformers/modeling_tf_gpt2.py
+1
-0
transformers/modeling_tf_transfo_xl.py
transformers/modeling_tf_transfo_xl.py
+3
-3
transformers/modeling_tf_transfo_xl_utilities.py
transformers/modeling_tf_transfo_xl_utilities.py
+6
-6
transformers/modeling_tf_xlnet.py
transformers/modeling_tf_xlnet.py
+1
-1
No files found.
examples/summarization/configuration_bertabs.py
View file @
47f0e3cf
...
...
@@ -65,7 +65,7 @@ class BertAbsConfig(PretrainedConfig):
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
30522
,
vocab_size
=
30522
,
max_pos
=
512
,
enc_layers
=
6
,
enc_hidden_size
=
512
,
...
...
@@ -81,14 +81,14 @@ class BertAbsConfig(PretrainedConfig):
):
super
(
BertAbsConfig
,
self
).
__init__
(
**
kwargs
)
if
self
.
_input_is_path_to_json
(
vocab_size
_or_config_json_file
):
path_to_json
=
vocab_size
_or_config_json_file
if
self
.
_input_is_path_to_json
(
vocab_size
):
path_to_json
=
vocab_size
with
open
(
path_to_json
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size
_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size
_or_config_json_file
elif
isinstance
(
vocab_size
,
int
):
self
.
vocab_size
=
vocab_size
self
.
max_pos
=
max_pos
self
.
enc_layers
=
enc_layers
...
...
templates/adding_a_new_model/configuration_xxx.py
View file @
47f0e3cf
...
...
@@ -39,7 +39,7 @@ class XxxConfig(PretrainedConfig):
Arguments:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `XxxModel`.
vocab_size: Vocabulary size of `inputs_ids` in `XxxModel`.
hidden_size: Size of the encoder layers and the pooler layer.
num_hidden_layers: Number of hidden layers in the Transformer encoder.
num_attention_heads: Number of attention heads for each attention layer in
...
...
@@ -64,7 +64,7 @@ class XxxConfig(PretrainedConfig):
pretrained_config_archive_map
=
XXX_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
50257
,
vocab_size
=
50257
,
n_positions
=
1024
,
n_ctx
=
1024
,
n_embd
=
768
,
...
...
@@ -84,7 +84,7 @@ class XxxConfig(PretrainedConfig):
summary_first_dropout
=
0.1
,
**
kwargs
):
super
(
XxxConfig
,
self
).
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
_or_config_json_file
if
isinstance
(
vocab_size
_or_config_json_file
,
six
.
string_types
)
else
-
1
self
.
vocab_size
=
vocab_size
if
isinstance
(
vocab_size
,
six
.
string_types
)
else
-
1
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
...
...
@@ -102,12 +102,12 @@ class XxxConfig(PretrainedConfig):
self
.
summary_activation
=
summary_activation
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
if
isinstance
(
vocab_size
_or_config_json_file
,
six
.
string_types
):
with
open
(
vocab_size
_or_config_json_file
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
if
isinstance
(
vocab_size
,
six
.
string_types
):
with
open
(
vocab_size
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
not
isinstance
(
vocab_size
_or_config_json_file
,
int
):
elif
not
isinstance
(
vocab_size
,
int
):
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
...
...
templates/adding_a_new_model/tests/modeling_tf_xxx_test.py
View file @
47f0e3cf
...
...
@@ -111,7 +111,7 @@ class TFXxxModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
XxxConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
...
...
templates/adding_a_new_model/tests/modeling_xxx_test.py
View file @
47f0e3cf
...
...
@@ -109,7 +109,7 @@ class XxxModelTest(CommonTestCases.CommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
XxxConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
...
...
transformers/configuration_albert.py
View file @
47f0e3cf
...
...
@@ -37,7 +37,7 @@ class AlbertConfig(PretrainedConfig):
pretrained_config_archive_map
=
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
30000
,
vocab_size
=
30000
,
embedding_size
=
128
,
hidden_size
=
4096
,
num_hidden_layers
=
12
,
...
...
@@ -83,7 +83,7 @@ class AlbertConfig(PretrainedConfig):
"""
super
(
AlbertConfig
,
self
).
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
_or_config_json_file
self
.
vocab_size
=
vocab_size
self
.
embedding_size
=
embedding_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
...
...
@@ -97,4 +97,4 @@ class AlbertConfig(PretrainedConfig):
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
initializer_range
=
initializer_range
self
.
layer_norm_eps
=
layer_norm_eps
\ No newline at end of file
self
.
layer_norm_eps
=
layer_norm_eps
transformers/configuration_bert.py
View file @
47f0e3cf
...
...
@@ -56,7 +56,7 @@ class BertConfig(PretrainedConfig):
Arguments:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `BertModel`.
vocab_size: Vocabulary size of `inputs_ids` in `BertModel`.
hidden_size: Size of the encoder layers and the pooler layer.
num_hidden_layers: Number of hidden layers in the Transformer encoder.
num_attention_heads: Number of attention heads for each attention layer in
...
...
@@ -81,7 +81,7 @@ class BertConfig(PretrainedConfig):
pretrained_config_archive_map
=
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
30522
,
vocab_size
=
30522
,
hidden_size
=
768
,
num_hidden_layers
=
12
,
num_attention_heads
=
12
,
...
...
@@ -95,25 +95,15 @@ class BertConfig(PretrainedConfig):
layer_norm_eps
=
1e-12
,
**
kwargs
):
super
(
BertConfig
,
self
).
__init__
(
**
kwargs
)
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
hidden_act
=
hidden_act
self
.
intermediate_size
=
intermediate_size
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
initializer_range
=
initializer_range
self
.
layer_norm_eps
=
layer_norm_eps
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)"
)
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
hidden_act
=
hidden_act
self
.
intermediate_size
=
intermediate_size
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
initializer_range
=
initializer_range
self
.
layer_norm_eps
=
layer_norm_eps
transformers/configuration_ctrl.py
View file @
47f0e3cf
...
...
@@ -31,7 +31,7 @@ class CTRLConfig(PretrainedConfig):
"""Configuration class to store the configuration of a `CTRLModel`.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
dff: Size of the inner dimension of the FFN.
...
...
@@ -52,7 +52,7 @@ class CTRLConfig(PretrainedConfig):
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
246534
,
vocab_size
=
246534
,
n_positions
=
256
,
n_ctx
=
256
,
n_embd
=
1280
,
...
...
@@ -64,8 +64,6 @@ class CTRLConfig(PretrainedConfig):
attn_pdrop
=
0.1
,
layer_norm_epsilon
=
1e-6
,
initializer_range
=
0.02
,
num_labels
=
1
,
summary_type
=
'cls_index'
,
summary_use_proj
=
True
,
summary_activation
=
None
,
...
...
@@ -76,7 +74,7 @@ class CTRLConfig(PretrainedConfig):
"""Constructs CTRLConfig.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
dff: Size of the inner dimension of the FFN.
...
...
@@ -94,8 +92,7 @@ class CTRLConfig(PretrainedConfig):
initializing all weight matrices.
"""
super
(
CTRLConfig
,
self
).
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size_or_config_json_file
if
isinstance
(
vocab_size_or_config_json_file
,
int
)
else
-
1
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
...
...
@@ -108,23 +105,11 @@ class CTRLConfig(PretrainedConfig):
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
not
isinstance
(
vocab_size_or_config_json_file
,
int
):
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
)
@
property
def
max_position_embeddings
(
self
):
...
...
transformers/configuration_distilbert.py
View file @
47f0e3cf
...
...
@@ -37,7 +37,7 @@ class DistilBertConfig(PretrainedConfig):
pretrained_config_archive_map
=
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
30522
,
vocab_size
=
30522
,
max_position_embeddings
=
512
,
sinusoidal_pos_embds
=
False
,
n_layers
=
6
,
...
...
@@ -53,31 +53,21 @@ class DistilBertConfig(PretrainedConfig):
seq_classif_dropout
=
0.2
,
**
kwargs
):
super
(
DistilBertConfig
,
self
).
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
max_position_embeddings
=
max_position_embeddings
self
.
sinusoidal_pos_embds
=
sinusoidal_pos_embds
self
.
n_layers
=
n_layers
self
.
n_heads
=
n_heads
self
.
dim
=
dim
self
.
hidden_dim
=
hidden_dim
self
.
dropout
=
dropout
self
.
attention_dropout
=
attention_dropout
self
.
activation
=
activation
self
.
initializer_range
=
initializer_range
self
.
tie_weights_
=
tie_weights_
self
.
qa_dropout
=
qa_dropout
self
.
seq_classif_dropout
=
seq_classif_dropout
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
max_position_embeddings
=
max_position_embeddings
self
.
sinusoidal_pos_embds
=
sinusoidal_pos_embds
self
.
n_layers
=
n_layers
self
.
n_heads
=
n_heads
self
.
dim
=
dim
self
.
hidden_dim
=
hidden_dim
self
.
dropout
=
dropout
self
.
attention_dropout
=
attention_dropout
self
.
activation
=
activation
self
.
initializer_range
=
initializer_range
self
.
tie_weights_
=
tie_weights_
self
.
qa_dropout
=
qa_dropout
self
.
seq_classif_dropout
=
seq_classif_dropout
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)"
)
@
property
def
hidden_size
(
self
):
return
self
.
dim
...
...
transformers/configuration_gpt2.py
View file @
47f0e3cf
...
...
@@ -36,7 +36,7 @@ class GPT2Config(PretrainedConfig):
"""Configuration class to store the configuration of a `GPT2Model`.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
n_embd: Dimensionality of the embeddings and hidden states.
...
...
@@ -56,7 +56,7 @@ class GPT2Config(PretrainedConfig):
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
50257
,
vocab_size
=
50257
,
n_positions
=
1024
,
n_ctx
=
1024
,
n_embd
=
768
,
...
...
@@ -67,8 +67,6 @@ class GPT2Config(PretrainedConfig):
attn_pdrop
=
0.1
,
layer_norm_epsilon
=
1e-5
,
initializer_range
=
0.02
,
num_labels
=
1
,
summary_type
=
'cls_index'
,
summary_use_proj
=
True
,
summary_activation
=
None
,
...
...
@@ -79,7 +77,7 @@ class GPT2Config(PretrainedConfig):
"""Constructs GPT2Config.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
n_embd: Dimensionality of the embeddings and hidden states.
...
...
@@ -96,37 +94,22 @@ class GPT2Config(PretrainedConfig):
initializing all weight matrices.
"""
super
(
GPT2Config
,
self
).
__init__
(
**
kwargs
)
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
self
.
n_layer
=
n_layer
self
.
n_head
=
n_head
self
.
resid_pdrop
=
resid_pdrop
self
.
embd_pdrop
=
embd_pdrop
self
.
attn_pdrop
=
attn_pdrop
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
)
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
self
.
n_layer
=
n_layer
self
.
n_head
=
n_head
self
.
resid_pdrop
=
resid_pdrop
self
.
embd_pdrop
=
embd_pdrop
self
.
attn_pdrop
=
attn_pdrop
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
initializer_range
=
initializer_range
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
@
property
def
max_position_embeddings
(
self
):
...
...
transformers/configuration_openai.py
View file @
47f0e3cf
...
...
@@ -35,7 +35,7 @@ class OpenAIGPTConfig(PretrainedConfig):
Configuration class to store the configuration of a `OpenAIGPTModel`.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `OpenAIGPTModel` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `OpenAIGPTModel` or a configuration json file.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
n_embd: Dimensionality of the embeddings and hidden states.
...
...
@@ -58,7 +58,7 @@ class OpenAIGPTConfig(PretrainedConfig):
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
40478
,
vocab_size
=
40478
,
n_positions
=
512
,
n_ctx
=
512
,
n_embd
=
768
,
...
...
@@ -71,8 +71,6 @@ class OpenAIGPTConfig(PretrainedConfig):
layer_norm_epsilon
=
1e-5
,
initializer_range
=
0.02
,
predict_special_tokens
=
True
,
num_labels
=
1
,
summary_type
=
'cls_index'
,
summary_use_proj
=
True
,
summary_activation
=
None
,
...
...
@@ -83,39 +81,24 @@ class OpenAIGPTConfig(PretrainedConfig):
"""Constructs OpenAIGPTConfig.
"""
super
(
OpenAIGPTConfig
,
self
).
__init__
(
**
kwargs
)
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
"utf-8"
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
self
.
n_layer
=
n_layer
self
.
n_head
=
n_head
self
.
afn
=
afn
self
.
resid_pdrop
=
resid_pdrop
self
.
embd_pdrop
=
embd_pdrop
self
.
attn_pdrop
=
attn_pdrop
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
initializer_range
=
initializer_range
self
.
predict_special_tokens
=
predict_special_tokens
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
)
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
self
.
n_layer
=
n_layer
self
.
n_head
=
n_head
self
.
afn
=
afn
self
.
resid_pdrop
=
resid_pdrop
self
.
embd_pdrop
=
embd_pdrop
self
.
attn_pdrop
=
attn_pdrop
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
initializer_range
=
initializer_range
self
.
predict_special_tokens
=
predict_special_tokens
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
@
property
def
max_position_embeddings
(
self
):
...
...
transformers/configuration_transfo_xl.py
View file @
47f0e3cf
...
...
@@ -34,7 +34,7 @@ class TransfoXLConfig(PretrainedConfig):
"""Configuration class to store the configuration of a `TransfoXLModel`.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `TransfoXLModel` or a configuration json file.
vocab_size: Vocabulary size of `inputs_ids` in `TransfoXLModel` or a configuration json file.
cutoffs: cutoffs for the adaptive softmax
d_model: Dimensionality of the model's hidden states.
d_embed: Dimensionality of the embeddings
...
...
@@ -68,7 +68,7 @@ class TransfoXLConfig(PretrainedConfig):
pretrained_config_archive_map
=
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
267735
,
vocab_size
=
267735
,
cutoffs
=
[
20000
,
40000
,
200000
],
d_model
=
1024
,
d_embed
=
1024
,
...
...
@@ -100,7 +100,7 @@ class TransfoXLConfig(PretrainedConfig):
"""Constructs TransfoXLConfig.
"""
super
(
TransfoXLConfig
,
self
).
__init__
(
**
kwargs
)
self
.
n_token
=
vocab_size_or_config_json_file
if
isinstance
(
vocab_size_or_config_json_file
,
int
)
else
-
1
self
.
vocab_size
=
vocab_size
self
.
cutoffs
=
[]
self
.
cutoffs
.
extend
(
cutoffs
)
self
.
tie_weight
=
tie_weight
...
...
@@ -133,27 +133,17 @@ class TransfoXLConfig(PretrainedConfig):
self
.
init_std
=
init_std
self
.
layer_norm_epsilon
=
layer_norm_epsilon
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
not
isinstance
(
vocab_size_or_config_json_file
,
int
):
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)"
)
@
property
def
max_position_embeddings
(
self
):
return
self
.
tgt_len
+
self
.
ext_len
+
self
.
mem_len
@
property
def
vocab_size
(
self
):
return
self
.
n_token
def
n_token
(
self
):
# Backward compatibility
return
self
.
vocab_size
@
vocab_size
.
setter
def
vocab_size
(
self
,
value
):
self
.
n_token
=
value
@
n_token
.
setter
def
n_token
(
self
,
value
):
# Backward compatibility
self
.
vocab_size
=
value
@
property
def
hidden_size
(
self
):
...
...
transformers/configuration_utils.py
View file @
47f0e3cf
...
...
@@ -49,8 +49,7 @@ class PretrainedConfig(object):
pretrained_config_archive_map
=
{}
def
__init__
(
self
,
**
kwargs
):
self
.
finetuning_task
=
kwargs
.
pop
(
'finetuning_task'
,
None
)
self
.
num_labels
=
kwargs
.
pop
(
'num_labels'
,
2
)
# Attributes with defaults
self
.
output_attentions
=
kwargs
.
pop
(
'output_attentions'
,
False
)
self
.
output_hidden_states
=
kwargs
.
pop
(
'output_hidden_states'
,
False
)
self
.
output_past
=
kwargs
.
pop
(
'output_past'
,
True
)
# Not used by all models
...
...
@@ -59,6 +58,22 @@ class PretrainedConfig(object):
self
.
pruned_heads
=
kwargs
.
pop
(
'pruned_heads'
,
{})
self
.
is_decoder
=
kwargs
.
pop
(
'is_decoder'
,
False
)
# Fine-tuning task arguments
self
.
finetuning_task
=
kwargs
.
pop
(
'finetuning_task'
,
None
)
self
.
num_labels
=
kwargs
.
pop
(
'num_labels'
,
2
)
self
.
id2label
=
kwargs
.
pop
(
'id2label'
,
{
i
:
'LABEL_{}'
.
format
(
i
)
for
i
in
range
(
self
.
num_labels
)})
self
.
id2label
=
dict
((
int
(
key
),
value
)
for
key
,
value
in
self
.
id2label
.
items
())
self
.
label2id
=
kwargs
.
pop
(
'label2id'
,
dict
(
zip
(
self
.
id2label
.
values
(),
self
.
id2label
.
keys
())))
self
.
label2id
=
dict
((
key
,
int
(
value
))
for
key
,
value
in
self
.
label2id
.
items
())
# Additional attributes without default values
for
key
,
value
in
kwargs
.
items
():
try
:
setattr
(
self
,
key
,
value
)
except
AttributeError
as
err
:
logger
.
error
(
"Can't set {} with value {} for {}"
.
format
(
key
,
value
,
self
))
raise
err
def
save_pretrained
(
self
,
save_directory
):
""" Save a configuration object to the directory `save_directory`, so that it
can be re-loaded using the :func:`~transformers.PretrainedConfig.from_pretrained` class method.
...
...
@@ -183,17 +198,15 @@ class PretrainedConfig(object):
@
classmethod
def
from_dict
(
cls
,
json_object
):
"""Constructs a `Config` from a Python dictionary of parameters."""
config
=
cls
(
vocab_size_or_config_json_file
=-
1
)
for
key
,
value
in
json_object
.
items
():
setattr
(
config
,
key
,
value
)
return
config
return
cls
(
**
json_object
)
@
classmethod
def
from_json_file
(
cls
,
json_file
):
"""Constructs a `Config` from a json file of parameters."""
with
open
(
json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
text
=
reader
.
read
()
return
cls
.
from_dict
(
json
.
loads
(
text
))
dict_obj
=
json
.
loads
(
text
)
return
cls
(
**
dict_obj
)
def
__eq__
(
self
,
other
):
return
self
.
__dict__
==
other
.
__dict__
...
...
transformers/configuration_xlm.py
View file @
47f0e3cf
...
...
@@ -42,7 +42,7 @@ class XLMConfig(PretrainedConfig):
"""Configuration class to store the configuration of a `XLMModel`.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of `inputs_ids` in `XLMModel`.
vocab_size: Vocabulary size of `inputs_ids` in `XLMModel`.
d_model: Size of the encoder layers and the pooler layer.
n_layer: Number of hidden layers in the Transformer encoder.
n_head: Number of attention heads for each attention layer in
...
...
@@ -81,7 +81,7 @@ class XLMConfig(PretrainedConfig):
pretrained_config_archive_map
=
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
30145
,
vocab_size
=
30145
,
emb_dim
=
2048
,
n_layers
=
12
,
n_heads
=
16
,
...
...
@@ -103,9 +103,6 @@ class XLMConfig(PretrainedConfig):
unk_index
=
3
,
mask_index
=
5
,
is_encoder
=
True
,
finetuning_task
=
None
,
num_labels
=
2
,
summary_type
=
'first'
,
summary_use_proj
=
True
,
summary_activation
=
None
,
...
...
@@ -117,56 +114,43 @@ class XLMConfig(PretrainedConfig):
"""Constructs XLMConfig.
"""
super
(
XLMConfig
,
self
).
__init__
(
**
kwargs
)
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
n_words
=
vocab_size_or_config_json_file
self
.
emb_dim
=
emb_dim
self
.
n_layers
=
n_layers
self
.
n_heads
=
n_heads
self
.
dropout
=
dropout
self
.
attention_dropout
=
attention_dropout
self
.
gelu_activation
=
gelu_activation
self
.
sinusoidal_embeddings
=
sinusoidal_embeddings
self
.
causal
=
causal
self
.
asm
=
asm
self
.
n_langs
=
n_langs
self
.
use_lang_emb
=
use_lang_emb
self
.
layer_norm_eps
=
layer_norm_eps
self
.
bos_index
=
bos_index
self
.
eos_index
=
eos_index
self
.
pad_index
=
pad_index
self
.
unk_index
=
unk_index
self
.
mask_index
=
mask_index
self
.
is_encoder
=
is_encoder
self
.
max_position_embeddings
=
max_position_embeddings
self
.
embed_init_std
=
embed_init_std
self
.
init_std
=
init_std
self
.
finetuning_task
=
finetuning_task
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_proj_to_labels
=
summary_proj_to_labels
self
.
summary_first_dropout
=
summary_first_dropout
self
.
start_n_top
=
start_n_top
self
.
end_n_top
=
end_n_top
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)"
)
self
.
vocab_size
=
vocab_size
self
.
emb_dim
=
emb_dim
self
.
n_layers
=
n_layers
self
.
n_heads
=
n_heads
self
.
dropout
=
dropout
self
.
attention_dropout
=
attention_dropout
self
.
gelu_activation
=
gelu_activation
self
.
sinusoidal_embeddings
=
sinusoidal_embeddings
self
.
causal
=
causal
self
.
asm
=
asm
self
.
n_langs
=
n_langs
self
.
use_lang_emb
=
use_lang_emb
self
.
layer_norm_eps
=
layer_norm_eps
self
.
bos_index
=
bos_index
self
.
eos_index
=
eos_index
self
.
pad_index
=
pad_index
self
.
unk_index
=
unk_index
self
.
mask_index
=
mask_index
self
.
is_encoder
=
is_encoder
self
.
max_position_embeddings
=
max_position_embeddings
self
.
embed_init_std
=
embed_init_std
self
.
init_std
=
init_std
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_proj_to_labels
=
summary_proj_to_labels
self
.
summary_first_dropout
=
summary_first_dropout
self
.
start_n_top
=
start_n_top
self
.
end_n_top
=
end_n_top
@
property
def
vocab_size
(
self
):
return
self
.
n_words
def
n_words
(
self
):
# For backward compatibility
return
self
.
vocab_size
@
vocab_size
.
setter
def
vocab_size
(
self
,
value
):
self
.
n_words
=
value
@
n_words
.
setter
def
n_words
(
self
,
value
):
# For backward compatibility
self
.
vocab_size
=
value
@
property
def
hidden_size
(
self
):
...
...
transformers/configuration_xlnet.py
View file @
47f0e3cf
...
...
@@ -35,7 +35,7 @@ class XLNetConfig(PretrainedConfig):
"""Configuration class to store the configuration of a ``XLNetModel``.
Args:
vocab_size
_or_config_json_file
: Vocabulary size of ``inputs_ids`` in ``XLNetModel``.
vocab_size: Vocabulary size of ``inputs_ids`` in ``XLNetModel``.
d_model: Size of the encoder layers and the pooler layer.
n_layer: Number of hidden layers in the Transformer encoder.
n_head: Number of attention heads for each attention layer in
...
...
@@ -72,28 +72,22 @@ class XLNetConfig(PretrainedConfig):
pretrained_config_archive_map
=
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
vocab_size
_or_config_json_file
=
32000
,
vocab_size
=
32000
,
d_model
=
1024
,
n_layer
=
24
,
n_head
=
16
,
d_inner
=
4096
,
max_position_embeddings
=
512
,
ff_activation
=
"gelu"
,
untie_r
=
True
,
attn_type
=
"bi"
,
initializer_range
=
0.02
,
layer_norm_eps
=
1e-12
,
dropout
=
0.1
,
mem_len
=
None
,
reuse_len
=
None
,
bi_data
=
False
,
clamp_len
=-
1
,
same_length
=
False
,
finetuning_task
=
None
,
num_labels
=
2
,
summary_type
=
'last'
,
summary_use_proj
=
True
,
summary_activation
=
'tanh'
,
...
...
@@ -104,58 +98,45 @@ class XLNetConfig(PretrainedConfig):
"""Constructs XLNetConfig.
"""
super
(
XLNetConfig
,
self
).
__init__
(
**
kwargs
)
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
setattr
(
config
,
key
,
value
)
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
n_token
=
vocab_size_or_config_json_file
self
.
d_model
=
d_model
self
.
n_layer
=
n_layer
self
.
n_head
=
n_head
assert
d_model
%
n_head
==
0
self
.
d_head
=
d_model
//
n_head
self
.
ff_activation
=
ff_activation
self
.
d_inner
=
d_inner
self
.
untie_r
=
untie_r
self
.
attn_type
=
attn_type
self
.
initializer_range
=
initializer_range
self
.
layer_norm_eps
=
layer_norm_eps
self
.
dropout
=
dropout
self
.
mem_len
=
mem_len
self
.
reuse_len
=
reuse_len
self
.
bi_data
=
bi_data
self
.
clamp_len
=
clamp_len
self
.
same_length
=
same_length
self
.
finetuning_task
=
finetuning_task
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_last_dropout
=
summary_last_dropout
self
.
start_n_top
=
start_n_top
self
.
end_n_top
=
end_n_top
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)"
)
self
.
vocab_size
=
vocab_size
self
.
d_model
=
d_model
self
.
n_layer
=
n_layer
self
.
n_head
=
n_head
assert
d_model
%
n_head
==
0
self
.
d_head
=
d_model
//
n_head
self
.
ff_activation
=
ff_activation
self
.
d_inner
=
d_inner
self
.
untie_r
=
untie_r
self
.
attn_type
=
attn_type
self
.
initializer_range
=
initializer_range
self
.
layer_norm_eps
=
layer_norm_eps
self
.
dropout
=
dropout
self
.
mem_len
=
mem_len
self
.
reuse_len
=
reuse_len
self
.
bi_data
=
bi_data
self
.
clamp_len
=
clamp_len
self
.
same_length
=
same_length
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_last_dropout
=
summary_last_dropout
self
.
start_n_top
=
start_n_top
self
.
end_n_top
=
end_n_top
@
property
def
max_position_embeddings
(
self
):
return
-
1
@
property
def
vocab_size
(
self
):
return
self
.
n_token
def
n_token
(
self
):
# Backward compatibility
return
self
.
vocab_size
@
vocab_size
.
setter
def
vocab_size
(
self
,
value
):
self
.
n_token
=
value
@
n_token
.
setter
def
n_token
(
self
,
value
):
# Backward compatibility
self
.
vocab_size
=
value
@
property
def
hidden_size
(
self
):
...
...
transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
View file @
47f0e3cf
...
...
@@ -46,7 +46,7 @@ def convert_roberta_checkpoint_to_pytorch(roberta_checkpoint_path, pytorch_dump_
roberta
=
FairseqRobertaModel
.
from_pretrained
(
roberta_checkpoint_path
)
roberta
.
eval
()
# disable dropout
config
=
BertConfig
(
vocab_size
_or_config_json_file
=
50265
,
vocab_size
=
50265
,
hidden_size
=
roberta
.
args
.
encoder_embed_dim
,
num_hidden_layers
=
roberta
.
args
.
encoder_layers
,
num_attention_heads
=
roberta
.
args
.
encoder_attention_heads
,
...
...
transformers/modeling_gpt2.py
View file @
47f0e3cf
...
...
@@ -634,6 +634,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
GPT2DoubleHeadsModel
,
self
).
__init__
(
config
)
config
.
num_labels
=
1
self
.
transformer
=
GPT2Model
(
config
)
self
.
lm_head
=
nn
.
Linear
(
config
.
n_embd
,
config
.
vocab_size
,
bias
=
False
)
self
.
multiple_choice_head
=
SequenceSummary
(
config
)
...
...
transformers/modeling_tf_gpt2.py
View file @
47f0e3cf
...
...
@@ -574,6 +574,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
"""
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
TFGPT2DoubleHeadsModel
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
config
.
num_labels
=
1
self
.
transformer
=
TFGPT2MainLayer
(
config
,
name
=
'transformer'
)
self
.
multiple_choice_head
=
TFSequenceSummary
(
config
,
initializer_range
=
config
.
initializer_range
,
name
=
'multiple_choice_head'
)
...
...
transformers/modeling_tf_transfo_xl.py
View file @
47f0e3cf
...
...
@@ -353,7 +353,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer):
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
n_token
=
config
.
n_token
self
.
n_token
=
config
.
vocab_size
self
.
d_embed
=
config
.
d_embed
self
.
d_model
=
config
.
d_model
...
...
@@ -361,7 +361,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer):
self
.
d_head
=
config
.
d_head
self
.
untie_r
=
config
.
untie_r
self
.
word_emb
=
TFAdaptiveEmbedding
(
config
.
n_token
,
config
.
d_embed
,
config
.
d_model
,
config
.
cutoffs
,
self
.
word_emb
=
TFAdaptiveEmbedding
(
config
.
vocab_size
,
config
.
d_embed
,
config
.
d_model
,
config
.
cutoffs
,
div_val
=
config
.
div_val
,
init_std
=
config
.
init_std
,
name
=
'word_emb'
)
self
.
drop
=
tf
.
keras
.
layers
.
Dropout
(
config
.
dropout
)
...
...
@@ -729,7 +729,7 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
raise
NotImplementedError
# use adaptive softmax (including standard softmax)
else
:
self
.
crit
=
TFAdaptiveSoftmaxMask
(
config
.
n_token
,
config
.
d_embed
,
config
.
d_model
,
self
.
crit
=
TFAdaptiveSoftmaxMask
(
config
.
vocab_size
,
config
.
d_embed
,
config
.
d_model
,
config
.
cutoffs
,
div_val
=
config
.
div_val
,
name
=
'crit'
)
def
reset_length
(
self
,
tgt_len
,
ext_len
,
mem_len
):
...
...
transformers/modeling_tf_transfo_xl_utilities.py
View file @
47f0e3cf
...
...
@@ -25,15 +25,15 @@ import tensorflow as tf
from
.modeling_tf_utils
import
shape_list
class
TFAdaptiveSoftmaxMask
(
tf
.
keras
.
layers
.
Layer
):
def
__init__
(
self
,
n_token
,
d_embed
,
d_proj
,
cutoffs
,
div_val
=
1
,
def
__init__
(
self
,
vocab_size
,
d_embed
,
d_proj
,
cutoffs
,
div_val
=
1
,
keep_order
=
False
,
**
kwargs
):
super
(
TFAdaptiveSoftmaxMask
,
self
).
__init__
(
**
kwargs
)
self
.
n_token
=
n_token
self
.
vocab_size
=
vocab_size
self
.
d_embed
=
d_embed
self
.
d_proj
=
d_proj
self
.
cutoffs
=
cutoffs
+
[
n_token
]
self
.
cutoffs
=
cutoffs
+
[
vocab_size
]
self
.
cutoff_ends
=
[
0
]
+
self
.
cutoffs
self
.
div_val
=
div_val
...
...
@@ -66,11 +66,11 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
self
.
out_projs
.
append
(
weight
)
else
:
self
.
out_projs
.
append
(
None
)
weight
=
self
.
add_weight
(
shape
=
(
self
.
n_token
,
self
.
d_embed
,),
weight
=
self
.
add_weight
(
shape
=
(
self
.
vocab_size
,
self
.
d_embed
,),
initializer
=
'zeros'
,
trainable
=
True
,
name
=
'out_layers_._{}_._weight'
.
format
(
i
))
bias
=
self
.
add_weight
(
shape
=
(
self
.
n_token
,),
bias
=
self
.
add_weight
(
shape
=
(
self
.
vocab_size
,),
initializer
=
'zeros'
,
trainable
=
True
,
name
=
'out_layers_._{}_._bias'
.
format
(
i
))
...
...
@@ -114,7 +114,7 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
hidden
,
target
=
inputs
head_logprob
=
0
if
self
.
n_clusters
==
0
:
softmax_b
=
tf
.
get_variable
(
'bias'
,
[
n_token
],
initializer
=
tf
.
zeros_initializer
())
softmax_b
=
tf
.
get_variable
(
'bias'
,
[
self
.
config
.
vocab_size
],
initializer
=
tf
.
zeros_initializer
())
output
=
self
.
_logit
(
hidden
,
self
.
out_layers
[
0
][
0
],
self
.
out_layers
[
0
][
1
],
self
.
out_projs
[
0
])
if
target
is
not
None
:
loss
=
tf
.
nn
.
sparse_softmax_cross_entropy_with_logits
(
labels
=
target
,
logits
=
output
)
...
...
transformers/modeling_tf_xlnet.py
View file @
47f0e3cf
...
...
@@ -366,7 +366,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
self
.
use_bfloat16
=
config
.
use_bfloat16
self
.
initializer_range
=
config
.
initializer_range
self
.
word_embedding
=
TFSharedEmbeddings
(
config
.
n_token
,
config
.
d_model
,
initializer_range
=
config
.
initializer_range
,
name
=
'word_embedding'
)
self
.
word_embedding
=
TFSharedEmbeddings
(
config
.
vocab_size
,
config
.
d_model
,
initializer_range
=
config
.
initializer_range
,
name
=
'word_embedding'
)
self
.
layer
=
[
TFXLNetLayer
(
config
,
name
=
'layer_._{}'
.
format
(
i
))
for
i
in
range
(
config
.
n_layer
)]
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
dropout
)
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment