Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
e55d4c4e
Commit
e55d4c4e
authored
Jun 26, 2019
by
thomwolf
Browse files
various updates to conversion, models and examples
parent
603c513b
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
44 additions
and
21 deletions
+44
-21
README.md
README.md
+1
-2
examples/run_bert_swag.py
examples/run_bert_swag.py
+0
-0
examples/run_xlnet_classifier.py
examples/run_xlnet_classifier.py
+15
-7
pytorch_pretrained_bert/__init__.py
pytorch_pretrained_bert/__init__.py
+1
-0
pytorch_pretrained_bert/convert_xlnet_checkpoint_to_pytorch.py
...ch_pretrained_bert/convert_xlnet_checkpoint_to_pytorch.py
+21
-8
pytorch_pretrained_bert/modeling_xlnet.py
pytorch_pretrained_bert/modeling_xlnet.py
+6
-4
No files found.
README.md
View file @
e55d4c4e
...
...
@@ -1394,7 +1394,7 @@ The data for SWAG can be downloaded by cloning the following [repository](https:
```
shell
export
SWAG_DIR
=
/path/to/SWAG
python run_swag.py
\
python run_
bert_
swag.py
\
--bert_model
bert-base-uncased
\
--do_train
\
--do_lower_case
\
...
...
@@ -1581,7 +1581,6 @@ python run_xlnet_classifier.py \
--task_name
STS-B
\
--do_train
\
--do_eval
\
--do_lower_case
\
--data_dir
$GLUE_DIR
/STS-B/
\
--max_seq_length
128
\
--train_batch_size
8
\
...
...
examples/run_swag.py
→
examples/run_
bert_
swag.py
View file @
e55d4c4e
File moved
examples/run_xlnet_classifier.py
View file @
e55d4c4e
...
...
@@ -70,6 +70,8 @@ def main():
parser
.
add_argument
(
"--warmup_proportion"
,
default
=
0.1
,
type
=
float
,
help
=
"Proportion of training to perform linear learning rate warmup for. "
"E.g., 0.1 = 10%% of training."
)
parser
.
add_argument
(
"--clip_gradients"
,
default
=
1.0
,
type
=
float
,
help
=
"Clip gradient norms."
)
parser
.
add_argument
(
"--train_batch_size"
,
default
=
32
,
type
=
int
,
help
=
"Total batch size for training."
)
parser
.
add_argument
(
'--gradient_accumulation_steps'
,
type
=
int
,
default
=
1
,
...
...
@@ -80,6 +82,8 @@ def main():
help
=
"Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.
\n
"
"0 (default value): dynamic loss scaling.
\n
"
"Positive power of 2: static loss scaling value.
\n
"
)
parser
.
add_argument
(
"--log_every"
,
default
=
10
,
type
=
int
,
help
=
"Log metrics every X training steps."
)
# evaluation
parser
.
add_argument
(
"--do_eval"
,
action
=
'store_true'
,
help
=
"Whether to run eval on the dev set."
)
...
...
@@ -234,12 +238,13 @@ def main():
# Prepare optimizer
param_optimizer
=
list
(
model
.
named_parameters
())
no_decay
=
[
'bias'
,
'LayerNorm.bias'
,
'LayerNorm.weight'
]
optimizer_grouped_parameters
=
[
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
not
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.01
},
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
]
optimizer_grouped_parameters
=
model
.
parameters
()
# param_optimizer = list(model.named_parameters())
# no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
# optimizer_grouped_parameters = [
# {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
# {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
# ]
if
args
.
fp16
:
try
:
from
apex.optimizers
import
FP16_Optimizer
...
...
@@ -297,6 +302,9 @@ def main():
else
:
loss
.
backward
()
if
args
.
clip_gradients
>
0.0
:
torch
.
nn
.
utils
.
clip_grad_norm_
(
model
.
parameters
(),
args
.
clip_gradients
)
tr_loss
+=
loss
.
item
()
nb_tr_examples
+=
input_ids
.
size
(
0
)
nb_tr_steps
+=
1
...
...
@@ -310,7 +318,7 @@ def main():
optimizer
.
step
()
optimizer
.
zero_grad
()
global_step
+=
1
if
args
.
local_rank
in
[
-
1
,
0
]:
if
args
.
local_rank
in
[
-
1
,
0
]
and
(
args
.
log_every
<=
0
or
(
step
+
1
)
%
args
.
log_every
==
0
)
:
tb_writer
.
add_scalar
(
'lr'
,
optimizer
.
get_lr
()[
0
],
global_step
)
tb_writer
.
add_scalar
(
'loss'
,
loss
.
item
(),
global_step
)
...
...
pytorch_pretrained_bert/__init__.py
View file @
e55d4c4e
...
...
@@ -20,6 +20,7 @@ from .modeling_gpt2 import (GPT2Config, GPT2Model,
load_tf_weights_in_gpt2
)
from
.modeling_xlnet
import
(
XLNetBaseConfig
,
XLNetConfig
,
XLNetRunConfig
,
XLNetPreTrainedModel
,
XLNetModel
,
XLNetLMHeadModel
,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
,
load_tf_weights_in_xlnet
)
from
.optimization
import
BertAdam
...
...
pytorch_pretrained_bert/convert_xlnet_checkpoint_to_pytorch.py
View file @
e55d4c4e
...
...
@@ -28,20 +28,31 @@ from pytorch_pretrained_bert.modeling_xlnet import (CONFIG_NAME, WEIGHTS_NAME,
XLNetForSequenceClassification
,
load_tf_weights_in_xlnet
)
GLUE_TASKS
=
[
"cola"
,
"mnli"
,
"mnli-mm"
,
"mrpc"
,
"sst-2"
,
"sts-b"
,
"qqp"
,
"qnli"
,
"rte"
,
"wnli"
]
GLUE_TASKS
=
{
"cola"
:
"classification"
,
"mnli"
:
"classification"
,
"mrpc"
:
"classification"
,
"sst-2"
:
"classification"
,
"sts-b"
:
"regression"
,
"qqp"
:
"classification"
,
"qnli"
:
"classification"
,
"rte"
:
"classification"
,
"wnli"
:
"classification"
,
}
def
convert_xlnet_checkpoint_to_pytorch
(
tf_checkpoint_path
,
bert_config_file
,
pytorch_dump_folder_path
,
finetuning_task
=
None
):
# Initialise PyTorch model
config
=
XLNetConfig
.
from_json_file
(
bert_config_file
)
if
finetuning_task
is
not
None
and
finetuning_task
.
lower
()
in
GLUE_TASKS
:
model_class
=
XLNetLMHeadModel
elif
finetuning_task
is
not
None
and
'squad'
in
finetuning_task
.
lower
():
model_class
=
XLNetForQuestionAnswering
finetuning_task
=
finetuning_task
.
lower
()
if
finetuning_task
is
not
None
else
""
if
finetuning_task
in
GLUE_TASKS
:
print
(
"Building PyTorch XLNetForSequenceClassification model from configuration: {}"
.
format
(
str
(
config
)))
model
=
XLNetForSequenceClassification
(
config
,
is_regression
=
bool
(
GLUE_TASKS
[
finetuning_task
]
==
"regression"
))
elif
'squad'
in
finetuning_task
:
model
=
XLNetForQuestionAnswering
(
config
)
else
:
model_class
=
XLNetLMHeadModel
print
(
"Building PyTorch model {} from configuration: {}"
.
format
(
str
(
model_class
),
str
(
config
)))
model
=
model_class
(
config
)
model
=
XLNetLMHeadModel
(
config
)
# Load weights from tf checkpoint
load_tf_weights_in_xlnet
(
model
,
config
,
tf_checkpoint_path
,
finetuning_task
)
...
...
@@ -80,6 +91,8 @@ if __name__ == "__main__":
type
=
str
,
help
=
"Name of a task on which the XLNet TensorFloaw model was fine-tuned"
)
args
=
parser
.
parse_args
()
print
(
args
)
convert_xlnet_checkpoint_to_pytorch
(
args
.
tf_checkpoint_path
,
args
.
xlnet_config_file
,
args
.
pytorch_dump_folder_path
,
...
...
pytorch_pretrained_bert/modeling_xlnet.py
View file @
e55d4c4e
...
...
@@ -30,7 +30,7 @@ from io import open
import
torch
from
torch
import
nn
from
torch.nn
import
functional
as
F
from
torch.nn
import
CrossEntropyLoss
from
torch.nn
import
CrossEntropyLoss
,
MSELoss
from
.file_utils
import
cached_path
,
WEIGHTS_NAME
,
CONFIG_NAME
...
...
@@ -58,11 +58,11 @@ def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None, finetuning_tas
if
hasattr
(
model
,
'lm_loss'
):
# We will load also the output bias
tf_to_pt_map
[
'model/lm_loss/bias'
]
=
model
.
lm_loss
.
bias
el
if
hasattr
(
model
,
'sequence_summary'
)
and
'model/sequnece_summary/summary/kernel'
in
tf_weights
:
if
hasattr
(
model
,
'sequence_summary'
)
and
'model/sequnece_summary/summary/kernel'
in
tf_weights
:
# We will load also the sequence summary
tf_to_pt_map
[
'model/sequnece_summary/summary/kernel'
]
=
model
.
sequence_summary
.
summary
.
weight
tf_to_pt_map
[
'model/sequnece_summary/summary/bias'
]
=
model
.
sequence_summary
.
summary
.
bias
el
if
hasattr
(
model
,
'logits_proj'
)
and
finetuning_task
is
not
None
and
any
(
'model/regression
'
in
name
for
name
in
tf_weights
.
keys
())
:
if
hasattr
(
model
,
'logits_proj'
)
and
finetuning_task
is
not
None
and
'model/regression
_{}/logit/kernel'
.
format
(
finetuning_task
)
in
tf_weights
:
tf_to_pt_map
[
'model/regression_{}/logit/kernel'
.
format
(
finetuning_task
)]
=
model
.
logits_proj
.
weight
tf_to_pt_map
[
'model/regression_{}/logit/bias'
.
format
(
finetuning_task
)]
=
model
.
logits_proj
.
bias
...
...
@@ -133,6 +133,8 @@ def load_tf_weights_in_xlnet(model, config, tf_path, finetuning_task=None):
array
=
tf
.
train
.
load_variable
(
tf_path
,
name
)
tf_weights
[
name
]
=
array
input
(
"Press Enter to continue..."
)
# Build TF to PyTorch weights loading map
tf_to_pt_map
=
build_tf_xlnet_to_pytorch_map
(
model
,
config
,
tf_weights
,
finetuning_task
)
...
...
@@ -144,7 +146,7 @@ def load_tf_weights_in_xlnet(model, config, tf_path, finetuning_task=None):
array
=
tf_weights
[
name
]
# adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
# which are not required for using pretrained model
if
'kernel'
in
name
and
'ff'
in
name
:
if
'kernel'
in
name
and
(
'ff'
in
name
or
'summary'
in
name
or
'logit'
in
name
)
:
print
(
"Transposing"
)
array
=
np
.
transpose
(
array
)
if
isinstance
(
pointer
,
list
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment