Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
e468192e
Commit
e468192e
authored
Jul 09, 2019
by
thomwolf
Browse files
Merge branch 'pytorch-transformers' into xlnet
parents
9dd2c860
4ce237c8
Changes
84
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
124 additions
and
88 deletions
+124
-88
pytorch_transformers/convert_transfo_xl_checkpoint_to_pytorch.py
..._transformers/convert_transfo_xl_checkpoint_to_pytorch.py
+4
-5
pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py
pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py
+3
-3
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/file_utils.py
pytorch_transformers/file_utils.py
+1
-1
pytorch_transformers/modeling_bert.py
pytorch_transformers/modeling_bert.py
+4
-5
pytorch_transformers/modeling_gpt2.py
pytorch_transformers/modeling_gpt2.py
+13
-10
pytorch_transformers/modeling_openai.py
pytorch_transformers/modeling_openai.py
+14
-11
pytorch_transformers/modeling_transfo_xl.py
pytorch_transformers/modeling_transfo_xl.py
+5
-6
pytorch_transformers/modeling_transfo_xl_utilities.py
pytorch_transformers/modeling_transfo_xl_utilities.py
+0
-0
pytorch_transformers/modeling_utils.py
pytorch_transformers/modeling_utils.py
+53
-16
pytorch_transformers/modeling_xlm.py
pytorch_transformers/modeling_xlm.py
+13
-16
pytorch_transformers/modeling_xlnet.py
pytorch_transformers/modeling_xlnet.py
+8
-9
pytorch_transformers/optimization.py
pytorch_transformers/optimization.py
+0
-0
pytorch_transformers/optimization_openai.py
pytorch_transformers/optimization_openai.py
+0
-0
pytorch_transformers/tests/__init__.py
pytorch_transformers/tests/__init__.py
+0
-0
pytorch_transformers/tests/conftest.py
pytorch_transformers/tests/conftest.py
+0
-0
pytorch_transformers/tests/fixtures/input.txt
pytorch_transformers/tests/fixtures/input.txt
+0
-0
pytorch_transformers/tests/fixtures/sample_text.txt
pytorch_transformers/tests/fixtures/sample_text.txt
+0
-0
pytorch_transformers/tests/fixtures/test_sentencepiece.model
pytorch_transformers/tests/fixtures/test_sentencepiece.model
+0
-0
pytorch_transformers/tests/modeling_bert_test.py
pytorch_transformers/tests/modeling_bert_test.py
+5
-5
No files found.
pytorch_
pretrained_b
er
t
/convert_transfo_xl_checkpoint_to_pytorch.py
→
pytorch_
transform
er
s
/convert_transfo_xl_checkpoint_to_pytorch.py
View file @
e468192e
...
@@ -23,14 +23,13 @@ from io import open
...
@@ -23,14 +23,13 @@ from io import open
import
torch
import
torch
import
pytorch_
pretrained_b
er
t
.tokenization_transfo_xl
as
data_utils
import
pytorch_
transform
er
s
.tokenization_transfo_xl
as
data_utils
from
pytorch_
pretrained_b
er
t
.modeling_transfo_xl
import
(
CONFIG_NAME
,
from
pytorch_
transform
er
s
.modeling_transfo_xl
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
WEIGHTS_NAME
,
TransfoXLConfig
,
TransfoXLConfig
,
TransfoXLLMHeadModel
,
TransfoXLLMHeadModel
,
load_tf_weights_in_transfo_xl
)
load_tf_weights_in_transfo_xl
)
from
pytorch_pretrained_bert.tokenization_transfo_xl
import
(
CORPUS_NAME
,
from
pytorch_transformers.tokenization_transfo_xl
import
(
CORPUS_NAME
,
VOCAB_FILES_NAMES
)
VOCAB_NAME
)
if
sys
.
version_info
[
0
]
==
2
:
if
sys
.
version_info
[
0
]
==
2
:
import
cPickle
as
pickle
import
cPickle
as
pickle
...
@@ -53,7 +52,7 @@ def convert_transfo_xl_checkpoint_to_pytorch(tf_checkpoint_path,
...
@@ -53,7 +52,7 @@ def convert_transfo_xl_checkpoint_to_pytorch(tf_checkpoint_path,
with
open
(
transfo_xl_dataset_file
,
"rb"
)
as
fp
:
with
open
(
transfo_xl_dataset_file
,
"rb"
)
as
fp
:
corpus
=
pickle
.
load
(
fp
,
encoding
=
"latin1"
)
corpus
=
pickle
.
load
(
fp
,
encoding
=
"latin1"
)
# Save vocabulary and dataset cache as Dictionaries (should be better than pickles for the long-term)
# Save vocabulary and dataset cache as Dictionaries (should be better than pickles for the long-term)
pytorch_vocab_dump_path
=
pytorch_dump_folder_path
+
'/'
+
VOCAB_
NAME
pytorch_vocab_dump_path
=
pytorch_dump_folder_path
+
'/'
+
VOCAB_
FILES_NAMES
[
'pretrained_vocab_file'
]
print
(
"Save vocabulary to {}"
.
format
(
pytorch_vocab_dump_path
))
print
(
"Save vocabulary to {}"
.
format
(
pytorch_vocab_dump_path
))
corpus_vocab_dict
=
corpus
.
vocab
.
__dict__
corpus_vocab_dict
=
corpus
.
vocab
.
__dict__
torch
.
save
(
corpus_vocab_dict
,
pytorch_vocab_dump_path
)
torch
.
save
(
corpus_vocab_dict
,
pytorch_vocab_dump_path
)
...
...
pytorch_
pretrained_b
er
t
/convert_xlm_checkpoint_to_pytorch.py
→
pytorch_
transform
er
s
/convert_xlm_checkpoint_to_pytorch.py
View file @
e468192e
...
@@ -23,8 +23,8 @@ from io import open
...
@@ -23,8 +23,8 @@ from io import open
import
torch
import
torch
import
numpy
import
numpy
from
pytorch_
pretrained_b
er
t
.modeling_xlm
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
XLMConfig
,
XLMModel
)
from
pytorch_
transform
er
s
.modeling_xlm
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
XLMConfig
,
XLMModel
)
from
pytorch_
pretrained_b
er
t
.tokenization_xlm
import
MERGES_NAME
,
VOCAB
_NAME
from
pytorch_
transform
er
s
.tokenization_xlm
import
VOCAB_FILES
_NAME
S
def
convert_xlm_checkpoint_to_pytorch
(
xlm_checkpoint_path
,
pytorch_dump_folder_path
):
def
convert_xlm_checkpoint_to_pytorch
(
xlm_checkpoint_path
,
pytorch_dump_folder_path
):
...
@@ -42,7 +42,7 @@ def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_p
...
@@ -42,7 +42,7 @@ def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_p
# Save pytorch-model
# Save pytorch-model
pytorch_weights_dump_path
=
pytorch_dump_folder_path
+
'/'
+
WEIGHTS_NAME
pytorch_weights_dump_path
=
pytorch_dump_folder_path
+
'/'
+
WEIGHTS_NAME
pytorch_config_dump_path
=
pytorch_dump_folder_path
+
'/'
+
CONFIG_NAME
pytorch_config_dump_path
=
pytorch_dump_folder_path
+
'/'
+
CONFIG_NAME
pytorch_vocab_dump_path
=
pytorch_dump_folder_path
+
'/'
+
VOCAB_
NAME
pytorch_vocab_dump_path
=
pytorch_dump_folder_path
+
'/'
+
VOCAB_
FILES_NAMES
[
'vocab_file'
]
print
(
"Save PyTorch model to {}"
.
format
(
pytorch_weights_dump_path
))
print
(
"Save PyTorch model to {}"
.
format
(
pytorch_weights_dump_path
))
torch
.
save
(
model
,
pytorch_weights_dump_path
)
torch
.
save
(
model
,
pytorch_weights_dump_path
)
...
...
pytorch_
pretrained_b
er
t
/convert_xlnet_checkpoint_to_pytorch.py
→
pytorch_
transform
er
s
/convert_xlnet_checkpoint_to_pytorch.py
View file @
e468192e
...
@@ -22,7 +22,7 @@ import os
...
@@ -22,7 +22,7 @@ import os
import
argparse
import
argparse
import
torch
import
torch
from
pytorch_
pretrained_b
er
t
.modeling_xlnet
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
from
pytorch_
transform
er
s
.modeling_xlnet
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
XLNetConfig
,
XLNetConfig
,
XLNetLMHeadModel
,
XLNetForQuestionAnswering
,
XLNetLMHeadModel
,
XLNetForQuestionAnswering
,
XLNetForSequenceClassification
,
XLNetForSequenceClassification
,
...
...
pytorch_
pretrained_b
er
t
/file_utils.py
→
pytorch_
transform
er
s
/file_utils.py
View file @
e468192e
...
@@ -29,7 +29,7 @@ except ImportError:
...
@@ -29,7 +29,7 @@ except ImportError:
torch_cache_home
=
os
.
path
.
expanduser
(
torch_cache_home
=
os
.
path
.
expanduser
(
os
.
getenv
(
'TORCH_HOME'
,
os
.
path
.
join
(
os
.
getenv
(
'TORCH_HOME'
,
os
.
path
.
join
(
os
.
getenv
(
'XDG_CACHE_HOME'
,
'~/.cache'
),
'torch'
)))
os
.
getenv
(
'XDG_CACHE_HOME'
,
'~/.cache'
),
'torch'
)))
default_cache_path
=
os
.
path
.
join
(
torch_cache_home
,
'pytorch_
pretrained_b
er
t
'
)
default_cache_path
=
os
.
path
.
join
(
torch_cache_home
,
'pytorch_
transform
er
s
'
)
try
:
try
:
from
urllib.parse
import
urlparse
from
urllib.parse
import
urlparse
...
...
pytorch_
pretrained_b
er
t
/modeling_bert.py
→
pytorch_
transform
er
s
/modeling_bert.py
View file @
e468192e
...
@@ -28,12 +28,11 @@ import torch
...
@@ -28,12 +28,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
torch.nn
import
CrossEntropyLoss
,
MSELoss
from
torch.nn
import
CrossEntropyLoss
,
MSELoss
from
.file_utils
import
cached_path
from
.modeling_utils
import
WEIGHTS_NAME
,
CONFIG_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_linear_layer
from
.model_utils
import
WEIGHTS_NAME
,
CONFIG_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_linear_layer
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
PRETRAINED_MODEL_ARCHIVE_MAP
=
{
BERT_
PRETRAINED_MODEL_ARCHIVE_MAP
=
{
'bert-base-uncased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin"
,
'bert-base-uncased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin"
,
'bert-large-uncased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-pytorch_model.bin"
,
'bert-large-uncased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-pytorch_model.bin"
,
'bert-base-cased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-pytorch_model.bin"
,
'bert-base-cased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-pytorch_model.bin"
,
...
@@ -49,7 +48,7 @@ PRETRAINED_MODEL_ARCHIVE_MAP = {
...
@@ -49,7 +48,7 @@ PRETRAINED_MODEL_ARCHIVE_MAP = {
'bert-base-cased-finetuned-mrpc'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-pytorch_model.bin"
,
'bert-base-cased-finetuned-mrpc'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-pytorch_model.bin"
,
}
}
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
BERT_
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
'bert-base-uncased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json"
,
'bert-base-uncased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json"
,
'bert-large-uncased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-config.json"
,
'bert-large-uncased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-config.json"
,
'bert-base-cased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json"
,
'bert-base-cased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json"
,
...
@@ -545,7 +544,7 @@ class BertPreTrainedModel(PreTrainedModel):
...
@@ -545,7 +544,7 @@ class BertPreTrainedModel(PreTrainedModel):
a simple interface for dowloading and loading pretrained models.
a simple interface for dowloading and loading pretrained models.
"""
"""
config_class
=
BertConfig
config_class
=
BertConfig
pretrained_model_archive_map
=
PRETRAINED_MODEL_ARCHIVE_MAP
pretrained_model_archive_map
=
BERT_
PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights
=
load_tf_weights_in_bert
load_tf_weights
=
load_tf_weights_in_bert
base_model_prefix
=
"bert"
base_model_prefix
=
"bert"
...
...
pytorch_
pretrained_b
er
t
/modeling_gpt2.py
→
pytorch_
transform
er
s
/modeling_gpt2.py
View file @
e468192e
...
@@ -30,16 +30,15 @@ import torch.nn as nn
...
@@ -30,16 +30,15 @@ import torch.nn as nn
from
torch.nn
import
CrossEntropyLoss
from
torch.nn
import
CrossEntropyLoss
from
torch.nn.parameter
import
Parameter
from
torch.nn.parameter
import
Parameter
from
.file_utils
import
cached_path
from
.modeling_utils
import
(
Conv1D
,
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
from
.model_utils
import
(
Conv1D
,
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_conv1d_layer
,
SequenceSummary
)
PreTrainedModel
,
prune_conv1d_layer
,
SequenceSummary
)
from
.modeling_bert
import
BertLayerNorm
as
LayerNorm
from
.modeling_bert
import
BertLayerNorm
as
LayerNorm
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
PRETRAINED_MODEL_ARCHIVE_MAP
=
{
"gpt2"
:
"https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-pytorch_model.bin"
,
GPT2_
PRETRAINED_MODEL_ARCHIVE_MAP
=
{
"gpt2"
:
"https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-pytorch_model.bin"
,
"gpt2-medium"
:
"https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-pytorch_model.bin"
}
"gpt2-medium"
:
"https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-pytorch_model.bin"
}
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
"gpt2"
:
"https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-config.json"
,
GPT2_
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
"gpt2"
:
"https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-config.json"
,
"gpt2-medium"
:
"https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-config.json"
}
"gpt2-medium"
:
"https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-config.json"
}
def
load_tf_weights_in_gpt2
(
model
,
config
,
gpt2_checkpoint_path
):
def
load_tf_weights_in_gpt2
(
model
,
config
,
gpt2_checkpoint_path
):
...
@@ -103,7 +102,7 @@ def gelu(x):
...
@@ -103,7 +102,7 @@ def gelu(x):
class
GPT2Config
(
PretrainedConfig
):
class
GPT2Config
(
PretrainedConfig
):
"""Configuration class to store the configuration of a `GPT2Model`.
"""Configuration class to store the configuration of a `GPT2Model`.
"""
"""
pretrained_config_archive_map
=
PRETRAINED_CONFIG_ARCHIVE_MAP
pretrained_config_archive_map
=
GPT2_
PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
def
__init__
(
self
,
self
,
...
@@ -120,11 +119,13 @@ class GPT2Config(PretrainedConfig):
...
@@ -120,11 +119,13 @@ class GPT2Config(PretrainedConfig):
layer_norm_epsilon
=
1e-5
,
layer_norm_epsilon
=
1e-5
,
initializer_range
=
0.02
,
initializer_range
=
0.02
,
predict_special_tokens
=
True
,
predict_special_tokens
=
True
,
num_labels
=
1
,
summary_type
=
'token_ids'
,
summary_type
=
'token_ids'
,
summary_use_proj
=
True
,
summary_use_proj
=
True
,
summary_num_classes
=
1
,
summary_activation
=
None
,
summary_activation
=
None
,
summary_dropout
=
0.1
,
summary_proj_to_labels
=
True
,
summary_first_dropout
=
0.1
,
**
kwargs
**
kwargs
):
):
"""Constructs GPT2Config.
"""Constructs GPT2Config.
...
@@ -170,11 +171,13 @@ class GPT2Config(PretrainedConfig):
...
@@ -170,11 +171,13 @@ class GPT2Config(PretrainedConfig):
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
initializer_range
=
initializer_range
self
.
initializer_range
=
initializer_range
self
.
predict_special_tokens
=
predict_special_tokens
self
.
predict_special_tokens
=
predict_special_tokens
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_num_classes
=
summary_num_classes
self
.
summary_activation
=
summary_activation
self
.
summary_activation
=
summary_activation
self
.
summary_dropout
=
summary_dropout
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
else
:
else
:
raise
ValueError
(
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"First argument must be either a vocabulary size (int)"
...
@@ -358,7 +361,7 @@ class GPT2PreTrainedModel(PreTrainedModel):
...
@@ -358,7 +361,7 @@ class GPT2PreTrainedModel(PreTrainedModel):
a simple interface for dowloading and loading pretrained models.
a simple interface for dowloading and loading pretrained models.
"""
"""
config_class
=
GPT2Config
config_class
=
GPT2Config
pretrained_model_archive_map
=
PRETRAINED_MODEL_ARCHIVE_MAP
pretrained_model_archive_map
=
GPT2_
PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights
=
load_tf_weights_in_gpt2
load_tf_weights
=
load_tf_weights_in_gpt2
base_model_prefix
=
"transformer"
base_model_prefix
=
"transformer"
...
...
pytorch_
pretrained_b
er
t
/modeling_openai.py
→
pytorch_
transform
er
s
/modeling_openai.py
View file @
e468192e
...
@@ -30,15 +30,14 @@ import torch.nn as nn
...
@@ -30,15 +30,14 @@ import torch.nn as nn
from
torch.nn
import
CrossEntropyLoss
from
torch.nn
import
CrossEntropyLoss
from
torch.nn.parameter
import
Parameter
from
torch.nn.parameter
import
Parameter
from
.file_utils
import
cached_path
from
.modeling_utils
import
(
Conv1D
,
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
from
.model_utils
import
(
Conv1D
,
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_conv1d_layer
,
SequenceSummary
)
PreTrainedModel
,
prune_conv1d_layer
,
SequenceSummary
)
from
.modeling_bert
import
BertLayerNorm
as
LayerNorm
from
.modeling_bert
import
BertLayerNorm
as
LayerNorm
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
PRETRAINED_MODEL_ARCHIVE_MAP
=
{
"openai-gpt"
:
"https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-pytorch_model.bin"
}
OPENAI_GPT_
PRETRAINED_MODEL_ARCHIVE_MAP
=
{
"openai-gpt"
:
"https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-pytorch_model.bin"
}
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
"openai-gpt"
:
"https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-config.json"
}
OPENAI_GPT_
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
"openai-gpt"
:
"https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-config.json"
}
def
load_tf_weights_in_openai_gpt
(
model
,
config
,
openai_checkpoint_folder_path
):
def
load_tf_weights_in_openai_gpt
(
model
,
config
,
openai_checkpoint_folder_path
):
...
@@ -130,7 +129,7 @@ ACT_FNS = {"relu": nn.ReLU, "swish": swish, "gelu": gelu}
...
@@ -130,7 +129,7 @@ ACT_FNS = {"relu": nn.ReLU, "swish": swish, "gelu": gelu}
class
OpenAIGPTConfig
(
PretrainedConfig
):
class
OpenAIGPTConfig
(
PretrainedConfig
):
"""Configuration class to store the configuration of a `OpenAIGPTModel`.
"""Configuration class to store the configuration of a `OpenAIGPTModel`.
"""
"""
pretrained_config_archive_map
=
PRETRAINED_CONFIG_ARCHIVE_MAP
pretrained_config_archive_map
=
OPENAI_GPT_
PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
def
__init__
(
self
,
self
,
...
@@ -148,11 +147,13 @@ class OpenAIGPTConfig(PretrainedConfig):
...
@@ -148,11 +147,13 @@ class OpenAIGPTConfig(PretrainedConfig):
layer_norm_epsilon
=
1e-5
,
layer_norm_epsilon
=
1e-5
,
initializer_range
=
0.02
,
initializer_range
=
0.02
,
predict_special_tokens
=
True
,
predict_special_tokens
=
True
,
num_labels
=
1
,
summary_type
=
'token_ids'
,
summary_type
=
'token_ids'
,
summary_use_proj
=
True
,
summary_use_proj
=
True
,
summary_num_classes
=
1
,
summary_activation
=
None
,
summary_activation
=
None
,
summary_dropout
=
0.1
,
summary_proj_to_labels
=
True
,
summary_first_dropout
=
0.1
,
**
kwargs
**
kwargs
):
):
"""Constructs OpenAIGPTConfig.
"""Constructs OpenAIGPTConfig.
...
@@ -201,11 +202,13 @@ class OpenAIGPTConfig(PretrainedConfig):
...
@@ -201,11 +202,13 @@ class OpenAIGPTConfig(PretrainedConfig):
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
initializer_range
=
initializer_range
self
.
initializer_range
=
initializer_range
self
.
predict_special_tokens
=
predict_special_tokens
self
.
predict_special_tokens
=
predict_special_tokens
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_num_classes
=
summary_num_classes
self
.
summary_activation
=
summary_activation
self
.
summary_activation
=
summary_activation
self
.
summary_dropout
=
summary_dropout
self
.
summary_first_dropout
=
summary_first_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
else
:
else
:
raise
ValueError
(
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"First argument must be either a vocabulary size (int)"
...
@@ -384,7 +387,7 @@ class OpenAIGPTPreTrainedModel(PreTrainedModel):
...
@@ -384,7 +387,7 @@ class OpenAIGPTPreTrainedModel(PreTrainedModel):
a simple interface for dowloading and loading pretrained models.
a simple interface for dowloading and loading pretrained models.
"""
"""
config_class
=
OpenAIGPTConfig
config_class
=
OpenAIGPTConfig
pretrained_model_archive_map
=
PRETRAINED_MODEL_ARCHIVE_MAP
pretrained_model_archive_map
=
OPENAI_GPT_
PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights
=
load_tf_weights_in_openai_gpt
load_tf_weights
=
load_tf_weights_in_openai_gpt
base_model_prefix
=
"transformer"
base_model_prefix
=
"transformer"
...
...
pytorch_
pretrained_b
er
t
/modeling_transfo_xl.py
→
pytorch_
transform
er
s
/modeling_transfo_xl.py
View file @
e468192e
...
@@ -36,15 +36,14 @@ from torch.nn.parameter import Parameter
...
@@ -36,15 +36,14 @@ from torch.nn.parameter import Parameter
from
.modeling_bert
import
BertLayerNorm
as
LayerNorm
from
.modeling_bert
import
BertLayerNorm
as
LayerNorm
from
.modeling_transfo_xl_utilities
import
ProjectedAdaptiveLogSoftmax
,
sample_logits
from
.modeling_transfo_xl_utilities
import
ProjectedAdaptiveLogSoftmax
,
sample_logits
from
.file_utils
import
cached_path
from
.modeling_utils
import
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
from
.model_utils
import
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
PRETRAINED_MODEL_ARCHIVE_MAP
=
{
TRANSFO_XL_
PRETRAINED_MODEL_ARCHIVE_MAP
=
{
'transfo-xl-wt103'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/transfo-xl-wt103-pytorch_model.bin"
,
'transfo-xl-wt103'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/transfo-xl-wt103-pytorch_model.bin"
,
}
}
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
TRANSFO_XL_
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
'transfo-xl-wt103'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/transfo-xl-wt103-config.json"
,
'transfo-xl-wt103'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/transfo-xl-wt103-config.json"
,
}
}
...
@@ -179,7 +178,7 @@ def load_tf_weights_in_transfo_xl(model, config, tf_path):
...
@@ -179,7 +178,7 @@ def load_tf_weights_in_transfo_xl(model, config, tf_path):
class
TransfoXLConfig
(
PretrainedConfig
):
class
TransfoXLConfig
(
PretrainedConfig
):
"""Configuration class to store the configuration of a `TransfoXLModel`.
"""Configuration class to store the configuration of a `TransfoXLModel`.
"""
"""
pretrained_config_archive_map
=
PRETRAINED_CONFIG_ARCHIVE_MAP
pretrained_config_archive_map
=
TRANSFO_XL_
PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
def
__init__
(
self
,
vocab_size_or_config_json_file
=
267735
,
vocab_size_or_config_json_file
=
267735
,
...
@@ -838,7 +837,7 @@ class TransfoXLPreTrainedModel(PreTrainedModel):
...
@@ -838,7 +837,7 @@ class TransfoXLPreTrainedModel(PreTrainedModel):
a simple interface for dowloading and loading pretrained models.
a simple interface for dowloading and loading pretrained models.
"""
"""
config_class
=
TransfoXLConfig
config_class
=
TransfoXLConfig
pretrained_model_archive_map
=
PRETRAINED_MODEL_ARCHIVE_MAP
pretrained_model_archive_map
=
TRANSFO_XL_
PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights
=
load_tf_weights_in_transfo_xl
load_tf_weights
=
load_tf_weights_in_transfo_xl
base_model_prefix
=
"transformer"
base_model_prefix
=
"transformer"
...
...
pytorch_
pretrained_b
er
t
/modeling_transfo_xl_utilities.py
→
pytorch_
transform
er
s
/modeling_transfo_xl_utilities.py
View file @
e468192e
File moved
pytorch_
pretrained_b
er
t
/model_utils.py
→
pytorch_
transform
er
s
/model
ing
_utils.py
View file @
e468192e
...
@@ -25,7 +25,7 @@ from io import open
...
@@ -25,7 +25,7 @@ from io import open
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
torch.nn
import
CrossEntropyLoss
,
MSELoss
,
functional
as
F
from
torch.nn
import
CrossEntropyLoss
,
functional
as
F
from
.file_utils
import
cached_path
from
.file_utils
import
cached_path
...
@@ -169,6 +169,22 @@ class PreTrainedModel(nn.Module):
...
@@ -169,6 +169,22 @@ class PreTrainedModel(nn.Module):
model_to_prune
=
getattr
(
self
,
self
.
base_model_prefix
,
self
)
# get the base model if needed
model_to_prune
=
getattr
(
self
,
self
.
base_model_prefix
,
self
)
# get the base model if needed
model_to_prune
.
_prune_heads
(
heads_to_prune
)
model_to_prune
.
_prune_heads
(
heads_to_prune
)
def
save_pretrained
(
self
,
save_directory
):
""" Save a model with its configuration file to a directory, so that it
can be re-loaded using the `from_pretrained(save_directory)` class method.
"""
assert
os
.
path
.
isdir
(
save_directory
),
"Saving path should be a directory where the model and configuration can be saved"
# Only save the model it-self if we are using distributed training
model_to_save
=
self
.
module
if
hasattr
(
self
,
'module'
)
else
self
# If we save using the predefined names, we can load using `from_pretrained`
output_model_file
=
os
.
path
.
join
(
save_directory
,
WEIGHTS_NAME
)
output_config_file
=
os
.
path
.
join
(
save_directory
,
CONFIG_NAME
)
torch
.
save
(
model_to_save
.
state_dict
(),
output_model_file
)
model_to_save
.
config
.
to_json_file
(
output_config_file
)
@
classmethod
@
classmethod
def
from_pretrained
(
cls
,
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
):
def
from_pretrained
(
cls
,
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
):
"""
"""
...
@@ -193,7 +209,8 @@ class PreTrainedModel(nn.Module):
...
@@ -193,7 +209,8 @@ class PreTrainedModel(nn.Module):
"""
"""
state_dict
=
kwargs
.
pop
(
'state_dict'
,
None
)
state_dict
=
kwargs
.
pop
(
'state_dict'
,
None
)
cache_dir
=
kwargs
.
pop
(
'cache_dir'
,
None
)
cache_dir
=
kwargs
.
pop
(
'cache_dir'
,
None
)
from_tf
=
kwargs
.
pop
(
'from_tf'
,
None
)
from_tf
=
kwargs
.
pop
(
'from_tf'
,
False
)
output_loading_info
=
kwargs
.
pop
(
'output_loading_info'
,
False
)
# Load config
# Load config
config
=
cls
.
config_class
.
from_pretrained
(
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
config
=
cls
.
config_class
.
from_pretrained
(
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
...
@@ -239,6 +256,21 @@ class PreTrainedModel(nn.Module):
...
@@ -239,6 +256,21 @@ class PreTrainedModel(nn.Module):
# Directly load from a TensorFlow checkpoint
# Directly load from a TensorFlow checkpoint
return
cls
.
load_tf_weights
(
model
,
config
,
resolved_archive_file
[:
-
6
])
# Remove the '.index'
return
cls
.
load_tf_weights
(
model
,
config
,
resolved_archive_file
[:
-
6
])
# Remove the '.index'
# Convert old format to new format if needed from a PyTorch state_dict
old_keys
=
[]
new_keys
=
[]
for
key
in
state_dict
.
keys
():
new_key
=
None
if
'gamma'
in
key
:
new_key
=
key
.
replace
(
'gamma'
,
'weight'
)
if
'beta'
in
key
:
new_key
=
key
.
replace
(
'beta'
,
'bias'
)
if
new_key
:
old_keys
.
append
(
key
)
new_keys
.
append
(
new_key
)
for
old_key
,
new_key
in
zip
(
old_keys
,
new_keys
):
state_dict
[
new_key
]
=
state_dict
.
pop
(
old_key
)
# Load from a PyTorch state_dict
# Load from a PyTorch state_dict
missing_keys
=
[]
missing_keys
=
[]
unexpected_keys
=
[]
unexpected_keys
=
[]
...
@@ -279,6 +311,10 @@ class PreTrainedModel(nn.Module):
...
@@ -279,6 +311,10 @@ class PreTrainedModel(nn.Module):
if
hasattr
(
model
,
'tie_weights'
):
if
hasattr
(
model
,
'tie_weights'
):
model
.
tie_weights
()
# make sure word embedding weights are still tied
model
.
tie_weights
()
# make sure word embedding weights are still tied
if
output_loading_info
:
loading_info
=
{
"missing_keys"
:
missing_keys
,
"unexpected_keys"
:
unexpected_keys
,
"error_msgs"
:
error_msgs
}
return
model
,
loading_info
return
model
return
model
...
@@ -478,10 +514,10 @@ class SequenceSummary(nn.Module):
...
@@ -478,10 +514,10 @@ class SequenceSummary(nn.Module):
- 'token_ids' => supply a Tensor of classification token indices (GPT/GPT-2)
- 'token_ids' => supply a Tensor of classification token indices (GPT/GPT-2)
- 'attn' => Not implemented now, use multi-head attention
- 'attn' => Not implemented now, use multi-head attention
summary_use_proj: Add a projection after the vector extraction
summary_use_proj: Add a projection after the vector extraction
summary_
num_classe
s: If
> 0:
the projection outputs to
n
classes (otherwise to hidden_size)
summary_
proj_to_label
s: If
True,
the projection outputs to
config.num_labels
classes (otherwise to hidden_size)
. Default: False.
summary_activation:
summary_activation:
'tanh' => add a tanh activation to the output, Other => no activation. Default
'tanh' => add a t
an
h
activation
to the output
summary_first_dropout: Add a dropout before the projection
an
d
activation
None => no
activation
summary_last_dropout: Add a dropout after the projection and
activation
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
SequenceSummary
,
self
).
__init__
()
super
(
SequenceSummary
,
self
).
__init__
()
...
@@ -495,8 +531,8 @@ class SequenceSummary(nn.Module):
...
@@ -495,8 +531,8 @@ class SequenceSummary(nn.Module):
self
.
summary
=
nn
.
Identity
()
self
.
summary
=
nn
.
Identity
()
if
hasattr
(
config
,
'summary_use_proj'
)
and
config
.
summary_use_proj
:
if
hasattr
(
config
,
'summary_use_proj'
)
and
config
.
summary_use_proj
:
if
hasattr
(
config
,
'summary_
num_classe
s'
)
and
config
.
summary_num_
c
la
sse
s
>
0
:
if
hasattr
(
config
,
'summary_
proj_to_label
s'
)
and
config
.
summary_
proj_to_labels
and
config
.
num_la
bel
s
>
0
:
num_classes
=
config
.
summary_
num_
c
la
sse
s
num_classes
=
config
.
num_la
bel
s
else
:
else
:
num_classes
=
config
.
hidden_size
num_classes
=
config
.
hidden_size
self
.
summary
=
nn
.
Linear
(
config
.
hidden_size
,
num_classes
)
self
.
summary
=
nn
.
Linear
(
config
.
hidden_size
,
num_classes
)
...
@@ -505,7 +541,13 @@ class SequenceSummary(nn.Module):
...
@@ -505,7 +541,13 @@ class SequenceSummary(nn.Module):
if
hasattr
(
config
,
'summary_activation'
)
and
config
.
summary_activation
==
'tanh'
:
if
hasattr
(
config
,
'summary_activation'
)
and
config
.
summary_activation
==
'tanh'
:
self
.
activation
=
nn
.
Tanh
()
self
.
activation
=
nn
.
Tanh
()
self
.
dropout
=
nn
.
Dropout
(
config
.
summary_dropout
)
self
.
first_dropout
=
nn
.
Identity
()
if
hasattr
(
config
,
'summary_first_dropout'
)
and
config
.
summary_first_dropout
>
0
:
self
.
first_dropout
=
nn
.
Dropout
(
config
.
summary_first_dropout
)
self
.
last_dropout
=
nn
.
Identity
()
if
hasattr
(
config
,
'summary_last_dropout'
)
and
config
.
summary_last_dropout
>
0
:
self
.
last_dropout
=
nn
.
Dropout
(
config
.
summary_last_dropout
)
def
forward
(
self
,
hidden_states
,
token_ids
=
None
):
def
forward
(
self
,
hidden_states
,
token_ids
=
None
):
""" hidden_states: float Tensor in shape [bsz, seq_len, hidden_size], the hidden-states of the last layer.
""" hidden_states: float Tensor in shape [bsz, seq_len, hidden_size], the hidden-states of the last layer.
...
@@ -531,9 +573,10 @@ class SequenceSummary(nn.Module):
...
@@ -531,9 +573,10 @@ class SequenceSummary(nn.Module):
elif
self
.
summary_type
==
'attn'
:
elif
self
.
summary_type
==
'attn'
:
raise
NotImplementedError
raise
NotImplementedError
output
=
self
.
first_dropout
(
output
)
output
=
self
.
summary
(
output
)
output
=
self
.
summary
(
output
)
output
=
self
.
activation
(
output
)
output
=
self
.
activation
(
output
)
output
=
self
.
dropout
(
output
)
output
=
self
.
last_
dropout
(
output
)
return
output
return
output
...
@@ -598,9 +641,3 @@ def prune_layer(layer, index, dim=None):
...
@@ -598,9 +641,3 @@ def prune_layer(layer, index, dim=None):
return
prune_conv1d_layer
(
layer
,
index
,
dim
=
1
if
dim
is
None
else
dim
)
return
prune_conv1d_layer
(
layer
,
index
,
dim
=
1
if
dim
is
None
else
dim
)
else
:
else
:
raise
ValueError
(
"Can't prune layer of class {}"
.
format
(
layer
.
__class__
))
raise
ValueError
(
"Can't prune layer of class {}"
.
format
(
layer
.
__class__
))
def
clean_up_tokenization
(
out_string
):
out_string
.
replace
(
' .'
,
'.'
).
replace
(
' ?'
,
'?'
).
replace
(
' !'
,
'!'
).
replace
(
' ,'
,
','
).
replace
(
" ' "
,
"'"
).
replace
(
" n't"
,
"n't"
).
replace
(
" 'm"
,
"'m"
).
replace
(
" do not"
,
" don't"
).
replace
(
" 's"
,
"'s"
).
replace
(
" 've"
,
"'ve"
).
replace
(
" 're"
,
"'re"
)
return
out_string
pytorch_
pretrained_b
er
t
/modeling_xlm.py
→
pytorch_
transform
er
s
/modeling_xlm.py
View file @
e468192e
...
@@ -14,18 +14,14 @@
...
@@ -14,18 +14,14 @@
# limitations under the License.
# limitations under the License.
""" PyTorch XLM model.
""" PyTorch XLM model.
"""
"""
from
__future__
import
(
absolute_import
,
division
,
print_function
,
unicode_literals
)
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
json
import
json
import
logging
import
logging
import
math
import
math
import
os
import
sys
import
sys
from
io
import
open
from
io
import
open
import
math
import
itertools
import
itertools
import
numpy
as
np
import
numpy
as
np
...
@@ -34,16 +30,15 @@ from torch import nn
...
@@ -34,16 +30,15 @@ from torch import nn
from
torch.nn
import
functional
as
F
from
torch.nn
import
functional
as
F
from
torch.nn
import
CrossEntropyLoss
,
MSELoss
from
torch.nn
import
CrossEntropyLoss
,
MSELoss
from
.file_utils
import
cached_path
from
.modeling_utils
import
(
PretrainedConfig
,
PreTrainedModel
,
from
.model_utils
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_linear_layer
,
SequenceSummary
,
SQuADHead
)
prune_linear_layer
,
SequenceSummary
,
SQuADHead
)
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
PRETRAINED_MODEL_ARCHIVE_MAP
=
{
XLM_
PRETRAINED_MODEL_ARCHIVE_MAP
=
{
'xlm-mlm-en-2048'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-en-2048-pytorch_model.bin"
,
'xlm-mlm-en-2048'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-en-2048-pytorch_model.bin"
,
}
}
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
XLM_
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
'xlm-mlm-en-2048'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-en-2048-config.json"
,
'xlm-mlm-en-2048'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-en-2048-config.json"
,
}
}
...
@@ -51,7 +46,7 @@ PRETRAINED_CONFIG_ARCHIVE_MAP = {
...
@@ -51,7 +46,7 @@ PRETRAINED_CONFIG_ARCHIVE_MAP = {
class
XLMConfig
(
PretrainedConfig
):
class
XLMConfig
(
PretrainedConfig
):
"""Configuration class to store the configuration of a `XLMModel`.
"""Configuration class to store the configuration of a `XLMModel`.
"""
"""
pretrained_config_archive_map
=
PRETRAINED_CONFIG_ARCHIVE_MAP
pretrained_config_archive_map
=
XLM_
PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
def
__init__
(
self
,
vocab_size_or_config_json_file
=
30145
,
vocab_size_or_config_json_file
=
30145
,
...
@@ -79,10 +74,11 @@ class XLMConfig(PretrainedConfig):
...
@@ -79,10 +74,11 @@ class XLMConfig(PretrainedConfig):
finetuning_task
=
None
,
finetuning_task
=
None
,
num_labels
=
2
,
num_labels
=
2
,
summary_type
=
'
la
st'
,
summary_type
=
'
fir
st'
,
summary_use_proj
=
True
,
summary_use_proj
=
True
,
summary_activation
=
'tanh'
,
summary_activation
=
None
,
summary_dropout
=
0.1
,
summary_proj_to_labels
=
True
,
summary_first_dropout
=
0.1
,
start_n_top
=
5
,
start_n_top
=
5
,
end_n_top
=
5
,
end_n_top
=
5
,
**
kwargs
):
**
kwargs
):
...
@@ -164,7 +160,8 @@ class XLMConfig(PretrainedConfig):
...
@@ -164,7 +160,8 @@ class XLMConfig(PretrainedConfig):
self
.
summary_type
=
summary_type
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_activation
=
summary_activation
self
.
summary_dropout
=
summary_dropout
self
.
summary_proj_to_labels
=
summary_proj_to_labels
self
.
summary_first_dropout
=
summary_first_dropout
self
.
start_n_top
=
start_n_top
self
.
start_n_top
=
start_n_top
self
.
end_n_top
=
end_n_top
self
.
end_n_top
=
end_n_top
else
:
else
:
...
@@ -204,7 +201,7 @@ def gelu(x):
...
@@ -204,7 +201,7 @@ def gelu(x):
GELU activation
GELU activation
https://arxiv.org/abs/1606.08415
https://arxiv.org/abs/1606.08415
https://github.com/huggingface/pytorch-openai-transformer-lm/blob/master/model_pytorch.py#L14
https://github.com/huggingface/pytorch-openai-transformer-lm/blob/master/model_pytorch.py#L14
https://github.com/huggingface/pytorch-
pretrained-BERT
/blob/master/modeling.py
https://github.com/huggingface/pytorch-
transformers
/blob/master/modeling.py
"""
"""
# return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
# return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
return
0.5
*
x
*
(
1.0
+
torch
.
erf
(
x
/
math
.
sqrt
(
2.0
)))
return
0.5
*
x
*
(
1.0
+
torch
.
erf
(
x
/
math
.
sqrt
(
2.0
)))
...
@@ -357,7 +354,7 @@ class XLMPreTrainedModel(PreTrainedModel):
...
@@ -357,7 +354,7 @@ class XLMPreTrainedModel(PreTrainedModel):
a simple interface for dowloading and loading pretrained models.
a simple interface for dowloading and loading pretrained models.
"""
"""
config_class
=
XLMConfig
config_class
=
XLMConfig
pretrained_model_archive_map
=
PRETRAINED_MODEL_ARCHIVE_MAP
pretrained_model_archive_map
=
XLM_
PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights
=
None
load_tf_weights
=
None
base_model_prefix
=
"transformer"
base_model_prefix
=
"transformer"
...
...
pytorch_
pretrained_b
er
t
/modeling_xlnet.py
→
pytorch_
transform
er
s
/modeling_xlnet.py
View file @
e468192e
...
@@ -31,17 +31,16 @@ from torch import nn
...
@@ -31,17 +31,16 @@ from torch import nn
from
torch.nn
import
functional
as
F
from
torch.nn
import
functional
as
F
from
torch.nn
import
CrossEntropyLoss
,
MSELoss
from
torch.nn
import
CrossEntropyLoss
,
MSELoss
from
.file_utils
import
cached_path
from
.modeling_utils
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
from
.model_utils
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
SequenceSummary
,
PoolerAnswerClass
,
PoolerEndLogits
,
PoolerStartLogits
)
SequenceSummary
,
PoolerAnswerClass
,
PoolerEndLogits
,
PoolerStartLogits
)
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
PRETRAINED_MODEL_ARCHIVE_MAP
=
{
XLNET_
PRETRAINED_MODEL_ARCHIVE_MAP
=
{
'xlnet-large-cased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-large-cased-pytorch_model.bin"
,
'xlnet-large-cased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-large-cased-pytorch_model.bin"
,
}
}
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
XLNET_
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
'xlnet-large-cased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-large-cased-config.json"
,
'xlnet-large-cased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-large-cased-config.json"
,
}
}
...
@@ -195,7 +194,7 @@ ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish}
...
@@ -195,7 +194,7 @@ ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish}
class
XLNetConfig
(
PretrainedConfig
):
class
XLNetConfig
(
PretrainedConfig
):
"""Configuration class to store the configuration of a `XLNetModel`.
"""Configuration class to store the configuration of a `XLNetModel`.
"""
"""
pretrained_config_archive_map
=
PRETRAINED_CONFIG_ARCHIVE_MAP
pretrained_config_archive_map
=
XLNET_
PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
def
__init__
(
self
,
vocab_size_or_config_json_file
=
32000
,
vocab_size_or_config_json_file
=
32000
,
...
@@ -227,7 +226,7 @@ class XLNetConfig(PretrainedConfig):
...
@@ -227,7 +226,7 @@ class XLNetConfig(PretrainedConfig):
summary_type
=
'last'
,
summary_type
=
'last'
,
summary_use_proj
=
True
,
summary_use_proj
=
True
,
summary_activation
=
'tanh'
,
summary_activation
=
'tanh'
,
summary_dropout
=
0.1
,
summary_
last_
dropout
=
0.1
,
start_n_top
=
5
,
start_n_top
=
5
,
end_n_top
=
5
,
end_n_top
=
5
,
**
kwargs
):
**
kwargs
):
...
@@ -314,7 +313,7 @@ class XLNetConfig(PretrainedConfig):
...
@@ -314,7 +313,7 @@ class XLNetConfig(PretrainedConfig):
self
.
summary_type
=
summary_type
self
.
summary_type
=
summary_type
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_use_proj
=
summary_use_proj
self
.
summary_activation
=
summary_activation
self
.
summary_activation
=
summary_activation
self
.
summary_dropout
=
summary_dropout
self
.
summary_
last_
dropout
=
summary_
last_
dropout
self
.
start_n_top
=
start_n_top
self
.
start_n_top
=
start_n_top
self
.
end_n_top
=
end_n_top
self
.
end_n_top
=
end_n_top
else
:
else
:
...
@@ -593,7 +592,7 @@ class XLNetPreTrainedModel(PreTrainedModel):
...
@@ -593,7 +592,7 @@ class XLNetPreTrainedModel(PreTrainedModel):
a simple interface for dowloading and loading pretrained models.
a simple interface for dowloading and loading pretrained models.
"""
"""
config_class
=
XLNetConfig
config_class
=
XLNetConfig
pretrained_model_archive_map
=
PRETRAINED_MODEL_ARCHIVE_MAP
pretrained_model_archive_map
=
XLNET_
PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights
=
load_tf_weights_in_xlnet
load_tf_weights
=
load_tf_weights_in_xlnet
base_model_prefix
=
"transformer"
base_model_prefix
=
"transformer"
...
...
pytorch_
pretrained_b
er
t
/optimization.py
→
pytorch_
transform
er
s
/optimization.py
View file @
e468192e
File moved
pytorch_
pretrained_b
er
t
/optimization_openai.py
→
pytorch_
transform
er
s
/optimization_openai.py
View file @
e468192e
File moved
pytorch_
pretrained_b
er
t
/tests/__init__.py
→
pytorch_
transform
er
s
/tests/__init__.py
View file @
e468192e
File moved
pytorch_
pretrained_b
er
t
/tests/conftest.py
→
pytorch_
transform
er
s
/tests/conftest.py
View file @
e468192e
File moved
pytorch_
pretrained_b
er
t
/tests/fixtures/input.txt
→
pytorch_
transform
er
s
/tests/fixtures/input.txt
View file @
e468192e
File moved
pytorch_
pretrained_b
er
t
/tests/fixtures/sample_text.txt
→
pytorch_
transform
er
s
/tests/fixtures/sample_text.txt
View file @
e468192e
File moved
pytorch_
pretrained_b
er
t
/tests/fixtures/test_sentencepiece.model
→
pytorch_
transform
er
s
/tests/fixtures/test_sentencepiece.model
View file @
e468192e
File moved
pytorch_
pretrained_b
er
t
/tests/modeling_bert_test.py
→
pytorch_
transform
er
s
/tests/modeling_bert_test.py
View file @
e468192e
...
@@ -20,13 +20,13 @@ import unittest
...
@@ -20,13 +20,13 @@ import unittest
import
shutil
import
shutil
import
pytest
import
pytest
from
pytorch_
pretrained_b
er
t
import
(
BertConfig
,
BertModel
,
BertForMaskedLM
,
from
pytorch_
transform
er
s
import
(
BertConfig
,
BertModel
,
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForPreTraining
,
BertForNextSentencePrediction
,
BertForPreTraining
,
BertForQuestionAnswering
,
BertForSequenceClassification
,
BertForQuestionAnswering
,
BertForSequenceClassification
,
BertForTokenClassification
,
BertForMultipleChoice
)
BertForTokenClassification
,
BertForMultipleChoice
)
from
pytorch_
pretrained_b
er
t
.modeling_bert
import
PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_
transform
er
s
.modeling_bert
import
BERT_
PRETRAINED_MODEL_ARCHIVE_MAP
from
.model_tests_commons
import
(
create_and_check_commons
,
ConfigTester
,
ids_tensor
)
from
.model
ing
_tests_commons
import
(
create_and_check_commons
,
ConfigTester
,
ids_tensor
)
class
BertModelTest
(
unittest
.
TestCase
):
class
BertModelTest
(
unittest
.
TestCase
):
...
@@ -266,8 +266,8 @@ class BertModelTest(unittest.TestCase):
...
@@ -266,8 +266,8 @@ class BertModelTest(unittest.TestCase):
@
pytest
.
mark
.
slow
@
pytest
.
mark
.
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/pytorch_
pretrained_b
er
t
_test/"
cache_dir
=
"/tmp/pytorch_
transform
er
s
_test/"
for
model_name
in
list
(
PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
BERT_
PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
BertModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
model
=
BertModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment