Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
077a5b0d
Commit
077a5b0d
authored
May 18, 2019
by
Chris
Browse files
Merge remote-tracking branch 'upstream/master' into convert-back-to-tf
merging
parents
2bcda8d0
3fc63f12
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
166 additions
and
154 deletions
+166
-154
examples/lm_finetuning/finetune_on_pregenerated.py
examples/lm_finetuning/finetune_on_pregenerated.py
+1
-2
examples/lm_finetuning/simple_lm_finetuning.py
examples/lm_finetuning/simple_lm_finetuning.py
+30
-30
examples/run_classifier.py
examples/run_classifier.py
+31
-31
examples/run_openai_gpt.py
examples/run_openai_gpt.py
+14
-13
examples/run_squad.py
examples/run_squad.py
+35
-35
examples/run_swag.py
examples/run_swag.py
+34
-34
hubconf.py
hubconf.py
+1
-1
pytorch_pretrained_bert/file_utils.py
pytorch_pretrained_bert/file_utils.py
+12
-3
pytorch_pretrained_bert/modeling.py
pytorch_pretrained_bert/modeling.py
+8
-5
No files found.
examples/lm_finetuning/finetune_on_pregenerated.py
View file @
077a5b0d
...
@@ -315,8 +315,7 @@ def main():
...
@@ -315,8 +315,7 @@ def main():
if
args
.
fp16
:
if
args
.
fp16
:
# modify learning rate with special warm up BERT uses
# modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically
# if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step
=
args
.
learning_rate
*
warmup_linear
.
get_lr
(
global_step
/
num_train_optimization_steps
,
lr_this_step
=
args
.
learning_rate
*
warmup_linear
.
get_lr
(
global_step
,
args
.
warmup_proportion
)
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
param_group
[
'lr'
]
=
lr_this_step
optimizer
.
step
()
optimizer
.
step
()
...
...
examples/lm_finetuning/simple_lm_finetuning.py
View file @
077a5b0d
...
@@ -534,6 +534,7 @@ def main():
...
@@ -534,6 +534,7 @@ def main():
model
=
torch
.
nn
.
DataParallel
(
model
)
model
=
torch
.
nn
.
DataParallel
(
model
)
# Prepare optimizer
# Prepare optimizer
if
args
.
do_train
:
param_optimizer
=
list
(
model
.
named_parameters
())
param_optimizer
=
list
(
model
.
named_parameters
())
no_decay
=
[
'bias'
,
'LayerNorm.bias'
,
'LayerNorm.weight'
]
no_decay
=
[
'bias'
,
'LayerNorm.bias'
,
'LayerNorm.weight'
]
optimizer_grouped_parameters
=
[
optimizer_grouped_parameters
=
[
...
@@ -603,8 +604,7 @@ def main():
...
@@ -603,8 +604,7 @@ def main():
if
args
.
fp16
:
if
args
.
fp16
:
# modify learning rate with special warm up BERT uses
# modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically
# if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step
=
args
.
learning_rate
*
warmup_linear
.
get_lr
(
global_step
/
num_train_optimization_steps
,
lr_this_step
=
args
.
learning_rate
*
warmup_linear
.
get_lr
(
global_step
,
args
.
warmup_proportion
)
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
param_group
[
'lr'
]
=
lr_this_step
optimizer
.
step
()
optimizer
.
step
()
...
...
examples/run_classifier.py
View file @
077a5b0d
...
@@ -271,7 +271,7 @@ class StsbProcessor(DataProcessor):
...
@@ -271,7 +271,7 @@ class StsbProcessor(DataProcessor):
class
QqpProcessor
(
DataProcessor
):
class
QqpProcessor
(
DataProcessor
):
"""Processor for the
STS-B
data set (GLUE version)."""
"""Processor for the
QQP
data set (GLUE version)."""
def
get_train_examples
(
self
,
data_dir
):
def
get_train_examples
(
self
,
data_dir
):
"""See base class."""
"""See base class."""
...
@@ -306,7 +306,7 @@ class QqpProcessor(DataProcessor):
...
@@ -306,7 +306,7 @@ class QqpProcessor(DataProcessor):
class
QnliProcessor
(
DataProcessor
):
class
QnliProcessor
(
DataProcessor
):
"""Processor for the
STS-B
data set (GLUE version)."""
"""Processor for the
QNLI
data set (GLUE version)."""
def
get_train_examples
(
self
,
data_dir
):
def
get_train_examples
(
self
,
data_dir
):
"""See base class."""
"""See base class."""
...
@@ -763,6 +763,7 @@ def main():
...
@@ -763,6 +763,7 @@ def main():
model
=
torch
.
nn
.
DataParallel
(
model
)
model
=
torch
.
nn
.
DataParallel
(
model
)
# Prepare optimizer
# Prepare optimizer
if
args
.
do_train
:
param_optimizer
=
list
(
model
.
named_parameters
())
param_optimizer
=
list
(
model
.
named_parameters
())
no_decay
=
[
'bias'
,
'LayerNorm.bias'
,
'LayerNorm.weight'
]
no_decay
=
[
'bias'
,
'LayerNorm.bias'
,
'LayerNorm.weight'
]
optimizer_grouped_parameters
=
[
optimizer_grouped_parameters
=
[
...
@@ -854,8 +855,7 @@ def main():
...
@@ -854,8 +855,7 @@ def main():
if
args
.
fp16
:
if
args
.
fp16
:
# modify learning rate with special warm up BERT uses
# modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically
# if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step
=
args
.
learning_rate
*
warmup_linear
.
get_lr
(
global_step
/
num_train_optimization_steps
,
lr_this_step
=
args
.
learning_rate
*
warmup_linear
.
get_lr
(
global_step
,
args
.
warmup_proportion
)
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
param_group
[
'lr'
]
=
lr_this_step
optimizer
.
step
()
optimizer
.
step
()
...
...
examples/run_openai_gpt.py
View file @
077a5b0d
...
@@ -183,6 +183,7 @@ def main():
...
@@ -183,6 +183,7 @@ def main():
eval_dataloader
=
DataLoader
(
eval_data
,
sampler
=
eval_sampler
,
batch_size
=
args
.
eval_batch_size
)
eval_dataloader
=
DataLoader
(
eval_data
,
sampler
=
eval_sampler
,
batch_size
=
args
.
eval_batch_size
)
# Prepare optimizer
# Prepare optimizer
if
args
.
do_train
:
param_optimizer
=
list
(
model
.
named_parameters
())
param_optimizer
=
list
(
model
.
named_parameters
())
no_decay
=
[
'bias'
,
'LayerNorm.bias'
,
'LayerNorm.weight'
]
no_decay
=
[
'bias'
,
'LayerNorm.bias'
,
'LayerNorm.weight'
]
optimizer_grouped_parameters
=
[
optimizer_grouped_parameters
=
[
...
...
examples/run_squad.py
View file @
077a5b0d
...
@@ -922,6 +922,7 @@ def main():
...
@@ -922,6 +922,7 @@ def main():
model
=
torch
.
nn
.
DataParallel
(
model
)
model
=
torch
.
nn
.
DataParallel
(
model
)
# Prepare optimizer
# Prepare optimizer
if
args
.
do_train
:
param_optimizer
=
list
(
model
.
named_parameters
())
param_optimizer
=
list
(
model
.
named_parameters
())
# hack to remove pooler, which is not used
# hack to remove pooler, which is not used
...
@@ -1015,8 +1016,7 @@ def main():
...
@@ -1015,8 +1016,7 @@ def main():
if
args
.
fp16
:
if
args
.
fp16
:
# modify learning rate with special warm up BERT uses
# modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used and handles this automatically
# if args.fp16 is False, BertAdam is used and handles this automatically
lr_this_step
=
args
.
learning_rate
*
warmup_linear
.
get_lr
(
global_step
/
num_train_optimization_steps
,
lr_this_step
=
args
.
learning_rate
*
warmup_linear
.
get_lr
(
global_step
,
args
.
warmup_proportion
)
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
param_group
[
'lr'
]
=
lr_this_step
optimizer
.
step
()
optimizer
.
step
()
...
...
examples/run_swag.py
View file @
077a5b0d
...
@@ -385,6 +385,7 @@ def main():
...
@@ -385,6 +385,7 @@ def main():
model
=
torch
.
nn
.
DataParallel
(
model
)
model
=
torch
.
nn
.
DataParallel
(
model
)
# Prepare optimizer
# Prepare optimizer
if
args
.
do_train
:
param_optimizer
=
list
(
model
.
named_parameters
())
param_optimizer
=
list
(
model
.
named_parameters
())
# hack to remove pooler, which is not used
# hack to remove pooler, which is not used
...
@@ -466,8 +467,7 @@ def main():
...
@@ -466,8 +467,7 @@ def main():
if
args
.
fp16
:
if
args
.
fp16
:
# modify learning rate with special warm up BERT uses
# modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically
# if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step
=
args
.
learning_rate
*
warmup_linear
.
get_lr
(
global_step
/
num_train_optimization_steps
,
lr_this_step
=
args
.
learning_rate
*
warmup_linear
.
get_lr
(
global_step
,
args
.
warmup_proportion
)
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
param_group
[
'lr'
]
=
lr_this_step
optimizer
.
step
()
optimizer
.
step
()
...
...
hubconf.py
View file @
077a5b0d
...
@@ -84,7 +84,7 @@ def bertTokenizer(*args, **kwargs):
...
@@ -84,7 +84,7 @@ def bertTokenizer(*args, **kwargs):
Example:
Example:
>>> sentence = 'Hello, World!'
>>> sentence = 'Hello, World!'
>>> tokenizer = torch.hub.load('
ailzhang
/pytorch-pretrained-BERT:hubconf', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False, force_reload=False)
>>> tokenizer = torch.hub.load('
huggingface
/pytorch-pretrained-BERT:hubconf', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False, force_reload=False)
>>> toks = tokenizer.tokenize(sentence)
>>> toks = tokenizer.tokenize(sentence)
['Hello', '##,', 'World', '##!']
['Hello', '##,', 'World', '##!']
>>> ids = tokenizer.convert_tokens_to_ids(toks)
>>> ids = tokenizer.convert_tokens_to_ids(toks)
...
...
pytorch_pretrained_bert/file_utils.py
View file @
077a5b0d
...
@@ -22,6 +22,15 @@ import requests
...
@@ -22,6 +22,15 @@ import requests
from
botocore.exceptions
import
ClientError
from
botocore.exceptions
import
ClientError
from
tqdm
import
tqdm
from
tqdm
import
tqdm
try
:
from
torch.hub
import
_get_torch_home
torch_cache_home
=
_get_torch_home
()
except
ImportError
:
torch_cache_home
=
os
.
path
.
expanduser
(
os
.
getenv
(
'TORCH_HOME'
,
os
.
path
.
join
(
os
.
getenv
(
'XDG_CACHE_HOME'
,
'~/.cache'
),
'torch'
)))
default_cache_path
=
os
.
path
.
join
(
torch_cache_home
,
'pytorch_pretrained_bert'
)
try
:
try
:
from
urllib.parse
import
urlparse
from
urllib.parse
import
urlparse
except
ImportError
:
except
ImportError
:
...
@@ -29,11 +38,11 @@ except ImportError:
...
@@ -29,11 +38,11 @@ except ImportError:
try
:
try
:
from
pathlib
import
Path
from
pathlib
import
Path
PYTORCH_PRETRAINED_BERT_CACHE
=
Path
(
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
PYTORCH_PRETRAINED_BERT_CACHE
=
Path
(
Path
.
home
()
/
'.pytor
ch_p
retrained_bert'
))
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
default_ca
ch
e
_p
ath
))
except
(
AttributeError
,
ImportError
):
except
(
AttributeError
,
ImportError
):
PYTORCH_PRETRAINED_BERT_CACHE
=
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
PYTORCH_PRETRAINED_BERT_CACHE
=
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
os
.
path
.
join
(
os
.
path
.
expanduser
(
"~"
),
'.pytor
ch_p
retrained_bert'
)
)
default_ca
ch
e
_p
ath
)
CONFIG_NAME
=
"config.json"
CONFIG_NAME
=
"config.json"
WEIGHTS_NAME
=
"pytorch_model.bin"
WEIGHTS_NAME
=
"pytorch_model.bin"
...
...
pytorch_pretrained_bert/modeling.py
View file @
077a5b0d
...
@@ -145,7 +145,8 @@ class BertConfig(object):
...
@@ -145,7 +145,8 @@ class BertConfig(object):
attention_probs_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
max_position_embeddings
=
512
,
type_vocab_size
=
2
,
type_vocab_size
=
2
,
initializer_range
=
0.02
):
initializer_range
=
0.02
,
layer_norm_eps
=
1e-12
):
"""Constructs BertConfig.
"""Constructs BertConfig.
Args:
Args:
...
@@ -169,6 +170,7 @@ class BertConfig(object):
...
@@ -169,6 +170,7 @@ class BertConfig(object):
`BertModel`.
`BertModel`.
initializer_range: The sttdev of the truncated_normal_initializer for
initializer_range: The sttdev of the truncated_normal_initializer for
initializing all weight matrices.
initializing all weight matrices.
layer_norm_eps: The epsilon used by LayerNorm.
"""
"""
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
...
@@ -188,6 +190,7 @@ class BertConfig(object):
...
@@ -188,6 +190,7 @@ class BertConfig(object):
self
.
max_position_embeddings
=
max_position_embeddings
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_vocab_size
=
type_vocab_size
self
.
initializer_range
=
initializer_range
self
.
initializer_range
=
initializer_range
self
.
layer_norm_eps
=
layer_norm_eps
else
:
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
)
"or the path to a pretrained model config file (str)"
)
...
@@ -254,7 +257,7 @@ class BertEmbeddings(nn.Module):
...
@@ -254,7 +257,7 @@ class BertEmbeddings(nn.Module):
# self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
# self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
# any TensorFlow checkpoint file
# any TensorFlow checkpoint file
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
1e-12
)
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
def
forward
(
self
,
input_ids
,
token_type_ids
=
None
):
def
forward
(
self
,
input_ids
,
token_type_ids
=
None
):
...
@@ -329,7 +332,7 @@ class BertSelfOutput(nn.Module):
...
@@ -329,7 +332,7 @@ class BertSelfOutput(nn.Module):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertSelfOutput
,
self
).
__init__
()
super
(
BertSelfOutput
,
self
).
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
hidden_size
)
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
hidden_size
)
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
1e-12
)
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
def
forward
(
self
,
hidden_states
,
input_tensor
):
def
forward
(
self
,
hidden_states
,
input_tensor
):
...
@@ -370,7 +373,7 @@ class BertOutput(nn.Module):
...
@@ -370,7 +373,7 @@ class BertOutput(nn.Module):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertOutput
,
self
).
__init__
()
super
(
BertOutput
,
self
).
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
intermediate_size
,
config
.
hidden_size
)
self
.
dense
=
nn
.
Linear
(
config
.
intermediate_size
,
config
.
hidden_size
)
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
1e-12
)
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
def
forward
(
self
,
hidden_states
,
input_tensor
):
def
forward
(
self
,
hidden_states
,
input_tensor
):
...
@@ -434,7 +437,7 @@ class BertPredictionHeadTransform(nn.Module):
...
@@ -434,7 +437,7 @@ class BertPredictionHeadTransform(nn.Module):
self
.
transform_act_fn
=
ACT2FN
[
config
.
hidden_act
]
self
.
transform_act_fn
=
ACT2FN
[
config
.
hidden_act
]
else
:
else
:
self
.
transform_act_fn
=
config
.
hidden_act
self
.
transform_act_fn
=
config
.
hidden_act
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
1e-12
)
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
def
forward
(
self
,
hidden_states
):
def
forward
(
self
,
hidden_states
):
hidden_states
=
self
.
dense
(
hidden_states
)
hidden_states
=
self
.
dense
(
hidden_states
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment