Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
a75c64d8
Commit
a75c64d8
authored
Aug 26, 2020
by
Lysandre
Browse files
Black 20 release
parent
e78c1103
Changes
191
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
719 additions
and
618 deletions
+719
-618
src/transformers/configuration_pegasus.py
src/transformers/configuration_pegasus.py
+2
-2
src/transformers/configuration_reformer.py
src/transformers/configuration_reformer.py
+92
-92
src/transformers/configuration_retribert.py
src/transformers/configuration_retribert.py
+38
-38
src/transformers/configuration_roberta.py
src/transformers/configuration_roberta.py
+18
-19
src/transformers/configuration_t5.py
src/transformers/configuration_t5.py
+29
-26
src/transformers/configuration_transfo_xl.py
src/transformers/configuration_transfo_xl.py
+78
-78
src/transformers/configuration_utils.py
src/transformers/configuration_utils.py
+110
-110
src/transformers/configuration_xlm.py
src/transformers/configuration_xlm.py
+115
-116
src/transformers/configuration_xlnet.py
src/transformers/configuration_xlnet.py
+99
-100
src/transformers/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
...onvert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
+2
-1
src/transformers/convert_graph_to_onnx.py
src/transformers/convert_graph_to_onnx.py
+25
-7
src/transformers/convert_marian_to_pytorch.py
src/transformers/convert_marian_to_pytorch.py
+8
-2
src/transformers/convert_pytorch_checkpoint_to_tf2.py
src/transformers/convert_pytorch_checkpoint_to_tf2.py
+54
-9
src/transformers/convert_reformer_trax_checkpoint_to_pytorch.py
...ansformers/convert_reformer_trax_checkpoint_to_pytorch.py
+20
-9
src/transformers/data/datasets/glue.py
src/transformers/data/datasets/glue.py
+4
-1
src/transformers/data/datasets/language_modeling.py
src/transformers/data/datasets/language_modeling.py
+11
-2
src/transformers/data/datasets/squad.py
src/transformers/data/datasets/squad.py
+4
-1
src/transformers/data/metrics/squad_metrics.py
src/transformers/data/metrics/squad_metrics.py
+3
-3
src/transformers/data/processors/glue.py
src/transformers/data/processors/glue.py
+4
-1
src/transformers/data/processors/utils.py
src/transformers/data/processors/utils.py
+3
-1
No files found.
src/transformers/configuration_pegasus.py
View file @
a75c64d8
src/transformers/configuration_reformer.py
View file @
a75c64d8
src/transformers/configuration_retribert.py
View file @
a75c64d8
src/transformers/configuration_roberta.py
View file @
a75c64d8
...
@@ -61,6 +61,5 @@ class RobertaConfig(BertConfig):
...
@@ -61,6 +61,5 @@ class RobertaConfig(BertConfig):
model_type
=
"roberta"
model_type
=
"roberta"
def
__init__
(
self
,
pad_token_id
=
1
,
bos_token_id
=
0
,
eos_token_id
=
2
,
**
kwargs
):
def
__init__
(
self
,
pad_token_id
=
1
,
bos_token_id
=
0
,
eos_token_id
=
2
,
**
kwargs
):
"""Constructs RobertaConfig.
"""Constructs RobertaConfig."""
"""
super
().
__init__
(
pad_token_id
=
pad_token_id
,
bos_token_id
=
bos_token_id
,
eos_token_id
=
eos_token_id
,
**
kwargs
)
super
().
__init__
(
pad_token_id
=
pad_token_id
,
bos_token_id
=
bos_token_id
,
eos_token_id
=
eos_token_id
,
**
kwargs
)
src/transformers/configuration_t5.py
View file @
a75c64d8
...
@@ -80,7 +80,10 @@ class T5Config(PretrainedConfig):
...
@@ -80,7 +80,10 @@ class T5Config(PretrainedConfig):
**
kwargs
**
kwargs
):
):
super
().
__init__
(
super
().
__init__
(
pad_token_id
=
pad_token_id
,
eos_token_id
=
eos_token_id
,
is_encoder_decoder
=
is_encoder_decoder
,
**
kwargs
,
pad_token_id
=
pad_token_id
,
eos_token_id
=
eos_token_id
,
is_encoder_decoder
=
is_encoder_decoder
,
**
kwargs
,
)
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
n_positions
=
n_positions
self
.
n_positions
=
n_positions
...
...
src/transformers/configuration_transfo_xl.py
View file @
a75c64d8
src/transformers/configuration_utils.py
View file @
a75c64d8
...
@@ -29,7 +29,7 @@ logger = logging.get_logger(__name__)
...
@@ -29,7 +29,7 @@ logger = logging.get_logger(__name__)
class
PretrainedConfig
(
object
):
class
PretrainedConfig
(
object
):
r
"""
Base class for all configuration classes.
r
"""Base class for all configuration classes.
Handles a few parameters common to all models' configurations as well as methods for loading/downloading/saving
Handles a few parameters common to all models' configurations as well as methods for loading/downloading/saving
configurations.
configurations.
...
...
src/transformers/configuration_xlm.py
View file @
a75c64d8
...
@@ -191,8 +191,7 @@ class XLMConfig(PretrainedConfig):
...
@@ -191,8 +191,7 @@ class XLMConfig(PretrainedConfig):
bos_token_id
=
0
,
bos_token_id
=
0
,
**
kwargs
**
kwargs
):
):
"""Constructs XLMConfig.
"""Constructs XLMConfig."""
"""
super
().
__init__
(
pad_token_id
=
pad_token_id
,
bos_token_id
=
bos_token_id
,
**
kwargs
)
super
().
__init__
(
pad_token_id
=
pad_token_id
,
bos_token_id
=
bos_token_id
,
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
emb_dim
=
emb_dim
self
.
emb_dim
=
emb_dim
...
...
src/transformers/configuration_xlnet.py
View file @
a75c64d8
...
@@ -162,8 +162,7 @@ class XLNetConfig(PretrainedConfig):
...
@@ -162,8 +162,7 @@ class XLNetConfig(PretrainedConfig):
eos_token_id
=
2
,
eos_token_id
=
2
,
**
kwargs
**
kwargs
):
):
"""Constructs XLNetConfig.
"""Constructs XLNetConfig."""
"""
super
().
__init__
(
pad_token_id
=
pad_token_id
,
bos_token_id
=
bos_token_id
,
eos_token_id
=
eos_token_id
,
**
kwargs
)
super
().
__init__
(
pad_token_id
=
pad_token_id
,
bos_token_id
=
bos_token_id
,
eos_token_id
=
eos_token_id
,
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
d_model
=
d_model
self
.
d_model
=
d_model
...
...
src/transformers/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
View file @
a75c64d8
...
@@ -27,5 +27,6 @@ if __name__ == "__main__":
...
@@ -27,5 +27,6 @@ if __name__ == "__main__":
checkpoint_path
=
os
.
path
.
join
(
args
.
dialogpt_path
,
f
"
{
MODEL
}
_ft.pkl"
)
checkpoint_path
=
os
.
path
.
join
(
args
.
dialogpt_path
,
f
"
{
MODEL
}
_ft.pkl"
)
pytorch_dump_folder_path
=
f
"./DialoGPT-
{
MODEL
}
"
pytorch_dump_folder_path
=
f
"./DialoGPT-
{
MODEL
}
"
convert_dialogpt_checkpoint
(
convert_dialogpt_checkpoint
(
checkpoint_path
,
pytorch_dump_folder_path
,
checkpoint_path
,
pytorch_dump_folder_path
,
)
)
src/transformers/convert_graph_to_onnx.py
View file @
a75c64d8
...
@@ -38,24 +38,39 @@ class OnnxConverterArgumentParser(ArgumentParser):
...
@@ -38,24 +38,39 @@ class OnnxConverterArgumentParser(ArgumentParser):
super
().
__init__
(
"ONNX Converter"
)
super
().
__init__
(
"ONNX Converter"
)
self
.
add_argument
(
self
.
add_argument
(
"--pipeline"
,
type
=
str
,
choices
=
SUPPORTED_PIPELINES
,
default
=
"feature-extraction"
,
"--pipeline"
,
type
=
str
,
choices
=
SUPPORTED_PIPELINES
,
default
=
"feature-extraction"
,
)
)
self
.
add_argument
(
self
.
add_argument
(
"--model"
,
type
=
str
,
required
=
True
,
help
=
"Model's id or path (ex: bert-base-cased)"
,
"--model"
,
type
=
str
,
required
=
True
,
help
=
"Model's id or path (ex: bert-base-cased)"
,
)
)
self
.
add_argument
(
"--tokenizer"
,
type
=
str
,
help
=
"Tokenizer's id or path (ex: bert-base-cased)"
)
self
.
add_argument
(
"--tokenizer"
,
type
=
str
,
help
=
"Tokenizer's id or path (ex: bert-base-cased)"
)
self
.
add_argument
(
self
.
add_argument
(
"--framework"
,
type
=
str
,
choices
=
[
"pt"
,
"tf"
],
help
=
"Framework for loading the model"
,
"--framework"
,
type
=
str
,
choices
=
[
"pt"
,
"tf"
],
help
=
"Framework for loading the model"
,
)
)
self
.
add_argument
(
"--opset"
,
type
=
int
,
default
=
11
,
help
=
"ONNX opset to use"
)
self
.
add_argument
(
"--opset"
,
type
=
int
,
default
=
11
,
help
=
"ONNX opset to use"
)
self
.
add_argument
(
self
.
add_argument
(
"--check-loading"
,
action
=
"store_true"
,
help
=
"Check ONNX is able to load the model"
,
"--check-loading"
,
action
=
"store_true"
,
help
=
"Check ONNX is able to load the model"
,
)
)
self
.
add_argument
(
self
.
add_argument
(
"--use-external-format"
,
action
=
"store_true"
,
help
=
"Allow exporting model >= than 2Gb"
,
"--use-external-format"
,
action
=
"store_true"
,
help
=
"Allow exporting model >= than 2Gb"
,
)
)
self
.
add_argument
(
self
.
add_argument
(
"--quantize"
,
action
=
"store_true"
,
help
=
"Quantize the neural network to be run with int8"
,
"--quantize"
,
action
=
"store_true"
,
help
=
"Quantize the neural network to be run with int8"
,
)
)
self
.
add_argument
(
"output"
)
self
.
add_argument
(
"output"
)
...
@@ -376,7 +391,10 @@ def quantize(onnx_model_path: Path) -> Path:
...
@@ -376,7 +391,10 @@ def quantize(onnx_model_path: Path) -> Path:
)
)
quantized_model
=
quantize
(
quantized_model
=
quantize
(
model
=
onnx_model
,
quantization_mode
=
QuantizationMode
.
IntegerOps
,
force_fusions
=
True
,
symmetric_weight
=
True
,
model
=
onnx_model
,
quantization_mode
=
QuantizationMode
.
IntegerOps
,
force_fusions
=
True
,
symmetric_weight
=
True
,
)
)
# Append "-quantized" at the end of the model's name
# Append "-quantized" at the end of the model's name
...
...
src/transformers/convert_marian_to_pytorch.py
View file @
a75c64d8
...
@@ -255,7 +255,11 @@ license: apache-2.0
...
@@ -255,7 +255,11 @@ license: apache-2.0
def
write_model_card
(
def
write_model_card
(
hf_model_name
:
str
,
repo_root
=
"OPUS-MT-train"
,
save_dir
=
Path
(
"marian_converted"
),
dry_run
=
False
,
extra_metadata
=
{},
hf_model_name
:
str
,
repo_root
=
"OPUS-MT-train"
,
save_dir
=
Path
(
"marian_converted"
),
dry_run
=
False
,
extra_metadata
=
{},
)
->
str
:
)
->
str
:
"""Copy the most recent model's readme section from opus, and add metadata.
"""Copy the most recent model's readme section from opus, and add metadata.
upload command: aws s3 sync model_card_dir s3://models.huggingface.co/bert/Helsinki-NLP/ --dryrun
upload command: aws s3 sync model_card_dir s3://models.huggingface.co/bert/Helsinki-NLP/ --dryrun
...
@@ -604,7 +608,9 @@ class OpusState:
...
@@ -604,7 +608,9 @@ class OpusState:
assert
"hidden_size"
not
in
cfg
.
to_dict
()
assert
"hidden_size"
not
in
cfg
.
to_dict
()
load_layers_
(
load_layers_
(
model
.
model
.
encoder
.
layers
,
state_dict
,
BART_CONVERTER
,
model
.
model
.
encoder
.
layers
,
state_dict
,
BART_CONVERTER
,
)
)
load_layers_
(
model
.
model
.
decoder
.
layers
,
state_dict
,
BART_CONVERTER
,
is_decoder
=
True
)
load_layers_
(
model
.
model
.
decoder
.
layers
,
state_dict
,
BART_CONVERTER
,
is_decoder
=
True
)
...
...
src/transformers/convert_pytorch_checkpoint_to_tf2.py
View file @
a75c64d8
...
@@ -108,7 +108,12 @@ if is_torch_available():
...
@@ -108,7 +108,12 @@ if is_torch_available():
logging
.
set_verbosity_info
()
logging
.
set_verbosity_info
()
MODEL_CLASSES
=
{
MODEL_CLASSES
=
{
"bert"
:
(
BertConfig
,
TFBertForPreTraining
,
BertForPreTraining
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,),
"bert"
:
(
BertConfig
,
TFBertForPreTraining
,
BertForPreTraining
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
),
"bert-large-uncased-whole-word-masking-finetuned-squad"
:
(
"bert-large-uncased-whole-word-masking-finetuned-squad"
:
(
BertConfig
,
BertConfig
,
TFBertForQuestionAnswering
,
TFBertForQuestionAnswering
,
...
@@ -127,9 +132,24 @@ MODEL_CLASSES = {
...
@@ -127,9 +132,24 @@ MODEL_CLASSES = {
BertForSequenceClassification
,
BertForSequenceClassification
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
),
),
"gpt2"
:
(
GPT2Config
,
TFGPT2LMHeadModel
,
GPT2LMHeadModel
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
,),
"gpt2"
:
(
"xlnet"
:
(
XLNetConfig
,
TFXLNetLMHeadModel
,
XLNetLMHeadModel
,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
,),
GPT2Config
,
"xlm"
:
(
XLMConfig
,
TFXLMWithLMHeadModel
,
XLMWithLMHeadModel
,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
,),
TFGPT2LMHeadModel
,
GPT2LMHeadModel
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
,
),
"xlnet"
:
(
XLNetConfig
,
TFXLNetLMHeadModel
,
XLNetLMHeadModel
,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
,
),
"xlm"
:
(
XLMConfig
,
TFXLMWithLMHeadModel
,
XLMWithLMHeadModel
,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
,
),
"xlm-roberta"
:
(
"xlm-roberta"
:
(
XLMRobertaConfig
,
XLMRobertaConfig
,
TFXLMRobertaForMaskedLM
,
TFXLMRobertaForMaskedLM
,
...
@@ -148,7 +168,12 @@ MODEL_CLASSES = {
...
@@ -148,7 +168,12 @@ MODEL_CLASSES = {
OpenAIGPTLMHeadModel
,
OpenAIGPTLMHeadModel
,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
),
),
"roberta"
:
(
RobertaConfig
,
TFRobertaForMaskedLM
,
RobertaForMaskedLM
,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
,),
"roberta"
:
(
RobertaConfig
,
TFRobertaForMaskedLM
,
RobertaForMaskedLM
,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
,
),
"roberta-large-mnli"
:
(
"roberta-large-mnli"
:
(
RobertaConfig
,
RobertaConfig
,
TFRobertaForSequenceClassification
,
TFRobertaForSequenceClassification
,
...
@@ -179,10 +204,30 @@ MODEL_CLASSES = {
...
@@ -179,10 +204,30 @@ MODEL_CLASSES = {
DistilBertForQuestionAnswering
,
DistilBertForQuestionAnswering
,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
),
),
"ctrl"
:
(
CTRLConfig
,
TFCTRLLMHeadModel
,
CTRLLMHeadModel
,
CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP
,),
"ctrl"
:
(
"albert"
:
(
AlbertConfig
,
TFAlbertForPreTraining
,
AlbertForPreTraining
,
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,),
CTRLConfig
,
"t5"
:
(
T5Config
,
TFT5ForConditionalGeneration
,
T5ForConditionalGeneration
,
T5_PRETRAINED_CONFIG_ARCHIVE_MAP
,),
TFCTRLLMHeadModel
,
"electra"
:
(
ElectraConfig
,
TFElectraForPreTraining
,
ElectraForPreTraining
,
ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP
,),
CTRLLMHeadModel
,
CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP
,
),
"albert"
:
(
AlbertConfig
,
TFAlbertForPreTraining
,
AlbertForPreTraining
,
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
),
"t5"
:
(
T5Config
,
TFT5ForConditionalGeneration
,
T5ForConditionalGeneration
,
T5_PRETRAINED_CONFIG_ARCHIVE_MAP
,
),
"electra"
:
(
ElectraConfig
,
TFElectraForPreTraining
,
ElectraForPreTraining
,
ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP
,
),
}
}
...
...
src/transformers/convert_reformer_trax_checkpoint_to_pytorch.py
View file @
a75c64d8
...
@@ -49,10 +49,12 @@ def set_layer_weights_in_torch_lsh(weights, torch_layer, hidden_size):
...
@@ -49,10 +49,12 @@ def set_layer_weights_in_torch_lsh(weights, torch_layer, hidden_size):
torch
.
tensor
(
np_query_key
).
transpose
(
1
,
2
).
contiguous
().
view
(
-
1
,
hidden_size
),
torch
.
tensor
(
np_query_key
).
transpose
(
1
,
2
).
contiguous
().
view
(
-
1
,
hidden_size
),
)
)
set_param
(
set_param
(
torch_layer
.
self_attention
.
value
,
torch
.
tensor
(
np_value
).
transpose
(
1
,
2
).
contiguous
().
view
(
-
1
,
hidden_size
),
torch_layer
.
self_attention
.
value
,
torch
.
tensor
(
np_value
).
transpose
(
1
,
2
).
contiguous
().
view
(
-
1
,
hidden_size
),
)
)
set_param
(
set_param
(
torch_layer
.
output
.
dense
,
torch
.
tensor
(
np_dense
).
view
(
-
1
,
hidden_size
).
contiguous
().
transpose
(
0
,
1
),
torch_layer
.
output
.
dense
,
torch
.
tensor
(
np_dense
).
view
(
-
1
,
hidden_size
).
contiguous
().
transpose
(
0
,
1
),
)
)
...
@@ -64,16 +66,20 @@ def set_layer_weights_in_torch_local(weights, torch_layer, hidden_size):
...
@@ -64,16 +66,20 @@ def set_layer_weights_in_torch_local(weights, torch_layer, hidden_size):
np_dense
=
np
.
asarray
(
weights
[
3
])
np_dense
=
np
.
asarray
(
weights
[
3
])
set_param
(
set_param
(
torch_layer
.
self_attention
.
query
,
torch
.
tensor
(
np_query
).
transpose
(
1
,
2
).
contiguous
().
view
(
-
1
,
hidden_size
),
torch_layer
.
self_attention
.
query
,
torch
.
tensor
(
np_query
).
transpose
(
1
,
2
).
contiguous
().
view
(
-
1
,
hidden_size
),
)
)
set_param
(
set_param
(
torch_layer
.
self_attention
.
key
,
torch
.
tensor
(
np_key
).
transpose
(
1
,
2
).
contiguous
().
view
(
-
1
,
hidden_size
),
torch_layer
.
self_attention
.
key
,
torch
.
tensor
(
np_key
).
transpose
(
1
,
2
).
contiguous
().
view
(
-
1
,
hidden_size
),
)
)
set_param
(
set_param
(
torch_layer
.
self_attention
.
value
,
torch
.
tensor
(
np_value
).
transpose
(
1
,
2
).
contiguous
().
view
(
-
1
,
hidden_size
),
torch_layer
.
self_attention
.
value
,
torch
.
tensor
(
np_value
).
transpose
(
1
,
2
).
contiguous
().
view
(
-
1
,
hidden_size
),
)
)
set_param
(
set_param
(
torch_layer
.
output
.
dense
,
torch
.
tensor
(
np_dense
).
view
(
-
1
,
hidden_size
).
contiguous
().
transpose
(
0
,
1
),
torch_layer
.
output
.
dense
,
torch
.
tensor
(
np_dense
).
view
(
-
1
,
hidden_size
).
contiguous
().
transpose
(
0
,
1
),
)
)
...
@@ -83,7 +89,9 @@ def set_block_weights_in_torch(weights, torch_block, hidden_size):
...
@@ -83,7 +89,9 @@ def set_block_weights_in_torch(weights, torch_block, hidden_size):
layer_norm_1_weight
=
np
.
asarray
(
layer_norm_1
[
0
])
layer_norm_1_weight
=
np
.
asarray
(
layer_norm_1
[
0
])
layer_norm_1_bias
=
np
.
asarray
(
layer_norm_1
[
1
])
layer_norm_1_bias
=
np
.
asarray
(
layer_norm_1
[
1
])
set_param
(
set_param
(
torch_block
.
attention
.
layer_norm
,
torch
.
tensor
(
layer_norm_1_weight
),
torch
.
tensor
(
layer_norm_1_bias
),
torch_block
.
attention
.
layer_norm
,
torch
.
tensor
(
layer_norm_1_weight
),
torch
.
tensor
(
layer_norm_1_bias
),
)
)
# lsh weights + output
# lsh weights + output
...
@@ -104,7 +112,9 @@ def set_block_weights_in_torch(weights, torch_block, hidden_size):
...
@@ -104,7 +112,9 @@ def set_block_weights_in_torch(weights, torch_block, hidden_size):
layer_norm_2_weight
=
np
.
asarray
(
intermediate_weights
[
0
][
0
])
layer_norm_2_weight
=
np
.
asarray
(
intermediate_weights
[
0
][
0
])
layer_norm_2_bias
=
np
.
asarray
(
intermediate_weights
[
0
][
1
])
layer_norm_2_bias
=
np
.
asarray
(
intermediate_weights
[
0
][
1
])
set_param
(
set_param
(
torch_block
.
feed_forward
.
layer_norm
,
torch
.
tensor
(
layer_norm_2_weight
),
torch
.
tensor
(
layer_norm_2_bias
),
torch_block
.
feed_forward
.
layer_norm
,
torch
.
tensor
(
layer_norm_2_weight
),
torch
.
tensor
(
layer_norm_2_bias
),
)
)
# intermediate dense
# intermediate dense
...
@@ -133,7 +143,8 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size):
...
@@ -133,7 +143,8 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size):
# word embeds
# word embeds
word_embeddings
=
np
.
asarray
(
weights
[
1
])
word_embeddings
=
np
.
asarray
(
weights
[
1
])
set_param
(
set_param
(
torch_model_reformer
.
embeddings
.
word_embeddings
,
torch
.
tensor
(
word_embeddings
),
torch_model_reformer
.
embeddings
.
word_embeddings
,
torch
.
tensor
(
word_embeddings
),
)
)
if
isinstance
(
weights
[
3
],
tuple
):
if
isinstance
(
weights
[
3
],
tuple
):
...
...
src/transformers/data/datasets/glue.py
View file @
a75c64d8
...
@@ -86,7 +86,10 @@ class GlueDataset(Dataset):
...
@@ -86,7 +86,10 @@ class GlueDataset(Dataset):
cached_features_file
=
os
.
path
.
join
(
cached_features_file
=
os
.
path
.
join
(
cache_dir
if
cache_dir
is
not
None
else
args
.
data_dir
,
cache_dir
if
cache_dir
is
not
None
else
args
.
data_dir
,
"cached_{}_{}_{}_{}"
.
format
(
"cached_{}_{}_{}_{}"
.
format
(
mode
.
value
,
tokenizer
.
__class__
.
__name__
,
str
(
args
.
max_seq_length
),
args
.
task_name
,
mode
.
value
,
tokenizer
.
__class__
.
__name__
,
str
(
args
.
max_seq_length
),
args
.
task_name
,
),
),
)
)
label_list
=
self
.
processor
.
get_labels
()
label_list
=
self
.
processor
.
get_labels
()
...
...
src/transformers/data/datasets/language_modeling.py
View file @
a75c64d8
...
@@ -21,7 +21,11 @@ class TextDataset(Dataset):
...
@@ -21,7 +21,11 @@ class TextDataset(Dataset):
"""
"""
def
__init__
(
def
__init__
(
self
,
tokenizer
:
PreTrainedTokenizer
,
file_path
:
str
,
block_size
:
int
,
overwrite_cache
=
False
,
self
,
tokenizer
:
PreTrainedTokenizer
,
file_path
:
str
,
block_size
:
int
,
overwrite_cache
=
False
,
):
):
assert
os
.
path
.
isfile
(
file_path
),
f
"Input file path
{
file_path
}
not found"
assert
os
.
path
.
isfile
(
file_path
),
f
"Input file path
{
file_path
}
not found"
...
@@ -29,7 +33,12 @@ class TextDataset(Dataset):
...
@@ -29,7 +33,12 @@ class TextDataset(Dataset):
directory
,
filename
=
os
.
path
.
split
(
file_path
)
directory
,
filename
=
os
.
path
.
split
(
file_path
)
cached_features_file
=
os
.
path
.
join
(
cached_features_file
=
os
.
path
.
join
(
directory
,
"cached_lm_{}_{}_{}"
.
format
(
tokenizer
.
__class__
.
__name__
,
str
(
block_size
),
filename
,),
directory
,
"cached_lm_{}_{}_{}"
.
format
(
tokenizer
.
__class__
.
__name__
,
str
(
block_size
),
filename
,
),
)
)
# Make sure only the first process in distributed training processes the dataset,
# Make sure only the first process in distributed training processes the dataset,
...
...
src/transformers/data/datasets/squad.py
View file @
a75c64d8
...
@@ -119,7 +119,10 @@ class SquadDataset(Dataset):
...
@@ -119,7 +119,10 @@ class SquadDataset(Dataset):
cached_features_file
=
os
.
path
.
join
(
cached_features_file
=
os
.
path
.
join
(
cache_dir
if
cache_dir
is
not
None
else
args
.
data_dir
,
cache_dir
if
cache_dir
is
not
None
else
args
.
data_dir
,
"cached_{}_{}_{}_{}"
.
format
(
"cached_{}_{}_{}_{}"
.
format
(
mode
.
value
,
tokenizer
.
__class__
.
__name__
,
str
(
args
.
max_seq_length
),
version_tag
,
mode
.
value
,
tokenizer
.
__class__
.
__name__
,
str
(
args
.
max_seq_length
),
version_tag
,
),
),
)
)
...
...
src/transformers/data/metrics/squad_metrics.py
View file @
a75c64d8
...
@@ -589,7 +589,7 @@ def compute_predictions_log_probs(
...
@@ -589,7 +589,7 @@ def compute_predictions_log_probs(
tokenizer
,
tokenizer
,
verbose_logging
,
verbose_logging
,
):
):
"""
XLNet write prediction logic (more complex than Bert's).
"""XLNet write prediction logic (more complex than Bert's).
Write final predictions to the json file and log-odds of null if needed.
Write final predictions to the json file and log-odds of null if needed.
Requires utils_squad_evaluate.py
Requires utils_squad_evaluate.py
...
...
src/transformers/data/processors/glue.py
View file @
a75c64d8
...
@@ -69,7 +69,10 @@ def glue_convert_examples_to_features(
...
@@ -69,7 +69,10 @@ def glue_convert_examples_to_features(
if
is_tf_available
():
if
is_tf_available
():
def
_tf_glue_convert_examples_to_features
(
def
_tf_glue_convert_examples_to_features
(
examples
:
tf
.
data
.
Dataset
,
tokenizer
:
PreTrainedTokenizer
,
task
=
str
,
max_length
:
Optional
[
int
]
=
None
,
examples
:
tf
.
data
.
Dataset
,
tokenizer
:
PreTrainedTokenizer
,
task
=
str
,
max_length
:
Optional
[
int
]
=
None
,
)
->
tf
.
data
.
Dataset
:
)
->
tf
.
data
.
Dataset
:
"""
"""
Returns:
Returns:
...
...
src/transformers/data/processors/utils.py
View file @
a75c64d8
...
@@ -269,7 +269,9 @@ class SingleSentenceClassificationProcessor(DataProcessor):
...
@@ -269,7 +269,9 @@ class SingleSentenceClassificationProcessor(DataProcessor):
logger
.
info
(
"Tokenizing example %d"
,
ex_index
)
logger
.
info
(
"Tokenizing example %d"
,
ex_index
)
input_ids
=
tokenizer
.
encode
(
input_ids
=
tokenizer
.
encode
(
example
.
text_a
,
add_special_tokens
=
True
,
max_length
=
min
(
max_length
,
tokenizer
.
max_len
),
example
.
text_a
,
add_special_tokens
=
True
,
max_length
=
min
(
max_length
,
tokenizer
.
max_len
),
)
)
all_input_ids
.
append
(
input_ids
)
all_input_ids
.
append
(
input_ids
)
...
...
Prev
1
2
3
4
5
6
7
8
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment