Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
fa5222c2
Commit
fa5222c2
authored
Jan 10, 2019
by
thomwolf
Browse files
update readme
parent
ab90d4cd
Changes
2
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
206 additions
and
322 deletions
+206
-322
README.md
README.md
+206
-18
examples/run_openai_gpt.py
examples/run_openai_gpt.py
+0
-304
No files found.
README.md
View file @
fa5222c2
This diff is collapsed.
Click to expand it.
examples/run_openai_gpt.py
deleted
100644 → 0
View file @
ab90d4cd
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
" Run OpenAI GPT on RocStories"
import
argparse
import
os
import
random
import
logging
from
sklearn.metrics
import
accuracy_score
from
sklearn.utils
import
shuffle
# from analysis import rocstories as rocstories_analysis
# from datasets import rocstories
# from model_pytorch import DoubleHeadModel, load_openai_pretrained_model
# from opt import OpenAIAdam
# from text_utils import TextEncoder
# from utils import (encode_dataset, iter_data,
# ResultLogger, make_path)
# from loss import MultipleChoiceLossCompute
import
numpy
as
np
import
torch
from
torch.utils.data
import
TensorDataset
,
DataLoader
,
RandomSampler
,
SequentialSampler
from
torch.utils.data.distributed
import
DistributedSampler
from
pytorch_pretrained_bert.tokenization_openai
import
OpenAIGPTTokenizer
from
pytorch_pretrained_bert.modeling_openai
import
OpenAIGPTDoubleHeadsModel
from
pytorch_pretrained_bert.optimization_openai
import
OpenAIAdam
from
pytorch_pretrained_bert.file_utils
import
PYTORCH_PRETRAINED_BERT_CACHE
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(name)s - %(message)s'
,
datefmt
=
'%m/%d/%Y %H:%M:%S'
,
level
=
logging
.
INFO
)
logger
=
logging
.
getLogger
(
__name__
)
def
transform_roc
(
X1
,
X2
,
X3
):
n_batch
=
len
(
X1
)
xmb
=
np
.
zeros
((
n_batch
,
2
,
n_ctx
,
2
),
dtype
=
np
.
int32
)
mmb
=
np
.
zeros
((
n_batch
,
2
,
n_ctx
),
dtype
=
np
.
float32
)
start
=
encoder
[
'_start_'
]
delimiter
=
encoder
[
'_delimiter_'
]
for
i
,
(
x1
,
x2
,
x3
),
in
enumerate
(
zip
(
X1
,
X2
,
X3
)):
x12
=
[
start
]
+
x1
[:
max_len
]
+
[
delimiter
]
+
x2
[:
max_len
]
+
[
clf_token
]
x13
=
[
start
]
+
x1
[:
max_len
]
+
[
delimiter
]
+
x3
[:
max_len
]
+
[
clf_token
]
l12
=
len
(
x12
)
l13
=
len
(
x13
)
xmb
[
i
,
0
,
:
l12
,
0
]
=
x12
xmb
[
i
,
1
,
:
l13
,
0
]
=
x13
mmb
[
i
,
0
,
:
l12
]
=
1
mmb
[
i
,
1
,
:
l13
]
=
1
# Position information that is added to the input embeddings in the TransformerModel
xmb
[:,
:,
:,
1
]
=
np
.
arange
(
n_vocab
+
n_special
,
n_vocab
+
n_special
+
n_ctx
)
return
xmb
,
mmb
def
iter_apply
(
Xs
,
Ms
,
Ys
):
# fns = [lambda x: np.concatenate(x, 0), lambda x: float(np.sum(x))]
logits
=
[]
cost
=
0
with
torch
.
no_grad
():
dh_model
.
eval
()
for
xmb
,
mmb
,
ymb
in
iter_data
(
Xs
,
Ms
,
Ys
,
n_batch
=
n_batch_train
,
truncate
=
False
,
verbose
=
True
):
n
=
len
(
xmb
)
XMB
=
torch
.
tensor
(
xmb
,
dtype
=
torch
.
long
).
to
(
device
)
YMB
=
torch
.
tensor
(
ymb
,
dtype
=
torch
.
long
).
to
(
device
)
MMB
=
torch
.
tensor
(
mmb
).
to
(
device
)
_
,
clf_logits
=
dh_model
(
XMB
)
clf_logits
*=
n
clf_losses
=
compute_loss_fct
(
XMB
,
YMB
,
MMB
,
clf_logits
,
only_return_losses
=
True
)
clf_losses
*=
n
logits
.
append
(
clf_logits
.
to
(
"cpu"
).
numpy
())
cost
+=
clf_losses
.
sum
().
item
()
logits
=
np
.
concatenate
(
logits
,
0
)
return
logits
,
cost
def
iter_predict
(
Xs
,
Ms
):
logits
=
[]
with
torch
.
no_grad
():
dh_model
.
eval
()
for
xmb
,
mmb
in
iter_data
(
Xs
,
Ms
,
n_batch
=
n_batch_train
,
truncate
=
False
,
verbose
=
True
):
n
=
len
(
xmb
)
XMB
=
torch
.
tensor
(
xmb
,
dtype
=
torch
.
long
).
to
(
device
)
MMB
=
torch
.
tensor
(
mmb
).
to
(
device
)
_
,
clf_logits
=
dh_model
(
XMB
)
logits
.
append
(
clf_logits
.
to
(
"cpu"
).
numpy
())
logits
=
np
.
concatenate
(
logits
,
0
)
return
logits
def
log
(
save_dir
,
desc
):
global
best_score
print
(
"Logging"
)
tr_logits
,
tr_cost
=
iter_apply
(
trX
[:
n_valid
],
trM
[:
n_valid
],
trY
[:
n_valid
])
va_logits
,
va_cost
=
iter_apply
(
vaX
,
vaM
,
vaY
)
tr_cost
=
tr_cost
/
len
(
trY
[:
n_valid
])
va_cost
=
va_cost
/
n_valid
tr_acc
=
accuracy_score
(
trY
[:
n_valid
],
np
.
argmax
(
tr_logits
,
1
))
*
100.
va_acc
=
accuracy_score
(
vaY
,
np
.
argmax
(
va_logits
,
1
))
*
100.
logger
.
log
(
n_epochs
=
n_epochs
,
n_updates
=
n_updates
,
tr_cost
=
tr_cost
,
va_cost
=
va_cost
,
tr_acc
=
tr_acc
,
va_acc
=
va_acc
)
print
(
'%d %d %.3f %.3f %.2f %.2f'
%
(
n_epochs
,
n_updates
,
tr_cost
,
va_cost
,
tr_acc
,
va_acc
))
if
submit
:
score
=
va_acc
if
score
>
best_score
:
best_score
=
score
path
=
os
.
path
.
join
(
save_dir
,
desc
,
'best_params'
)
torch
.
save
(
dh_model
.
state_dict
(),
make_path
(
path
))
def
predict
(
dataset
,
submission_dir
):
filename
=
filenames
[
dataset
]
pred_fn
=
pred_fns
[
dataset
]
label_decoder
=
label_decoders
[
dataset
]
predictions
=
pred_fn
(
iter_predict
(
teX
,
teM
))
if
label_decoder
is
not
None
:
predictions
=
[
label_decoder
[
prediction
]
for
prediction
in
predictions
]
path
=
os
.
path
.
join
(
submission_dir
,
filename
)
os
.
makedirs
(
os
.
path
.
dirname
(
path
),
exist_ok
=
True
)
with
open
(
path
,
'w'
)
as
f
:
f
.
write
(
'{}
\t
{}
\n
'
.
format
(
'index'
,
'prediction'
))
for
i
,
prediction
in
enumerate
(
predictions
):
f
.
write
(
'{}
\t
{}
\n
'
.
format
(
i
,
prediction
))
def
run_epoch
():
for
xmb
,
mmb
,
ymb
in
iter_data
(
*
shuffle
(
trX
,
trM
,
trYt
,
random_state
=
np
.
random
),
n_batch
=
n_batch_train
,
truncate
=
True
,
verbose
=
True
):
global
n_updates
dh_model
.
train
()
XMB
=
torch
.
tensor
(
xmb
,
dtype
=
torch
.
long
).
to
(
device
)
YMB
=
torch
.
tensor
(
ymb
,
dtype
=
torch
.
long
).
to
(
device
)
MMB
=
torch
.
tensor
(
mmb
).
to
(
device
)
lm_logits
,
clf_logits
=
dh_model
(
XMB
)
compute_loss_fct
(
XMB
,
YMB
,
MMB
,
clf_logits
,
lm_logits
)
n_updates
+=
1
if
n_updates
in
[
1000
,
2000
,
4000
,
8000
,
16000
,
32000
]
and
n_epochs
==
0
:
log
(
save_dir
,
desc
)
argmax
=
lambda
x
:
np
.
argmax
(
x
,
1
)
pred_fns
=
{
'rocstories'
:
argmax
,
}
filenames
=
{
'rocstories'
:
'ROCStories.tsv'
,
}
label_decoders
=
{
'rocstories'
:
None
,
}
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--desc'
,
type
=
str
,
help
=
"Description"
)
parser
.
add_argument
(
'--dataset'
,
type
=
str
)
parser
.
add_argument
(
'--log_dir'
,
type
=
str
,
default
=
'log/'
)
parser
.
add_argument
(
'--save_dir'
,
type
=
str
,
default
=
'save/'
)
parser
.
add_argument
(
'--data_dir'
,
type
=
str
,
default
=
'data/'
)
parser
.
add_argument
(
'--submission_dir'
,
type
=
str
,
default
=
'submission/'
)
parser
.
add_argument
(
'--submit'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--analysis'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--seed'
,
type
=
int
,
default
=
42
)
parser
.
add_argument
(
'--n_iter'
,
type
=
int
,
default
=
3
)
parser
.
add_argument
(
'--n_batch'
,
type
=
int
,
default
=
8
)
parser
.
add_argument
(
'--max_grad_norm'
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
'--lr'
,
type
=
float
,
default
=
6.25e-5
)
parser
.
add_argument
(
'--lr_warmup'
,
type
=
float
,
default
=
0.002
)
parser
.
add_argument
(
'--n_ctx'
,
type
=
int
,
default
=
512
)
parser
.
add_argument
(
'--n_embd'
,
type
=
int
,
default
=
768
)
parser
.
add_argument
(
'--n_head'
,
type
=
int
,
default
=
12
)
parser
.
add_argument
(
'--n_layer'
,
type
=
int
,
default
=
12
)
parser
.
add_argument
(
'--embd_pdrop'
,
type
=
float
,
default
=
0.1
)
parser
.
add_argument
(
'--attn_pdrop'
,
type
=
float
,
default
=
0.1
)
parser
.
add_argument
(
'--resid_pdrop'
,
type
=
float
,
default
=
0.1
)
parser
.
add_argument
(
'--clf_pdrop'
,
type
=
float
,
default
=
0.1
)
parser
.
add_argument
(
'--l2'
,
type
=
float
,
default
=
0.01
)
parser
.
add_argument
(
'--vector_l2'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--opt'
,
type
=
str
,
default
=
'adam'
)
parser
.
add_argument
(
'--afn'
,
type
=
str
,
default
=
'gelu'
)
parser
.
add_argument
(
'--lr_schedule'
,
type
=
str
,
default
=
'warmup_linear'
)
parser
.
add_argument
(
'--encoder_path'
,
type
=
str
,
default
=
'model/encoder_bpe_40000.json'
)
parser
.
add_argument
(
'--bpe_path'
,
type
=
str
,
default
=
'model/vocab_40000.bpe'
)
parser
.
add_argument
(
'--n_transfer'
,
type
=
int
,
default
=
12
)
parser
.
add_argument
(
'--lm_coef'
,
type
=
float
,
default
=
0.5
)
parser
.
add_argument
(
'--b1'
,
type
=
float
,
default
=
0.9
)
parser
.
add_argument
(
'--b2'
,
type
=
float
,
default
=
0.999
)
parser
.
add_argument
(
'--e'
,
type
=
float
,
default
=
1e-8
)
parser
.
add_argument
(
'--n_valid'
,
type
=
int
,
default
=
374
)
args
=
parser
.
parse_args
()
print
(
args
)
random
.
seed
(
args
.
seed
)
np
.
random
.
seed
(
args
.
seed
)
torch
.
manual_seed
(
args
.
seed
)
torch
.
cuda
.
manual_seed_all
(
args
.
seed
)
# Constants
submit
=
args
.
submit
dataset
=
args
.
dataset
n_ctx
=
args
.
n_ctx
save_dir
=
args
.
save_dir
desc
=
args
.
desc
data_dir
=
args
.
data_dir
log_dir
=
args
.
log_dir
submission_dir
=
args
.
submission_dir
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
n_gpu
=
torch
.
cuda
.
device_count
()
print
(
"device"
,
device
,
"n_gpu"
,
n_gpu
)
logger
=
ResultLogger
(
path
=
os
.
path
.
join
(
log_dir
,
'{}.jsonl'
.
format
(
desc
)),
**
args
.
__dict__
)
text_encoder
=
TextEncoder
(
args
.
encoder_path
,
args
.
bpe_path
)
encoder
=
text_encoder
.
encoder
n_vocab
=
len
(
text_encoder
.
encoder
)
print
(
"Encoding dataset..."
)
((
trX1
,
trX2
,
trX3
,
trY
),
(
vaX1
,
vaX2
,
vaX3
,
vaY
),
(
teX1
,
teX2
,
teX3
))
=
encode_dataset
(
*
rocstories
(
data_dir
,
n_valid
=
args
.
n_valid
),
encoder
=
text_encoder
)
encoder
[
'_start_'
]
=
len
(
encoder
)
encoder
[
'_delimiter_'
]
=
len
(
encoder
)
encoder
[
'_classify_'
]
=
len
(
encoder
)
clf_token
=
encoder
[
'_classify_'
]
n_special
=
3
max_len
=
n_ctx
//
2
-
2
n_ctx
=
min
(
max
(
[
len
(
x1
[:
max_len
])
+
max
(
len
(
x2
[:
max_len
]),
len
(
x3
[:
max_len
]))
for
x1
,
x2
,
x3
in
zip
(
trX1
,
trX2
,
trX3
)]
+
[
len
(
x1
[:
max_len
])
+
max
(
len
(
x2
[:
max_len
]),
len
(
x3
[:
max_len
]))
for
x1
,
x2
,
x3
in
zip
(
vaX1
,
vaX2
,
vaX3
)]
+
[
len
(
x1
[:
max_len
])
+
max
(
len
(
x2
[:
max_len
]),
len
(
x3
[:
max_len
]))
for
x1
,
x2
,
x3
in
zip
(
teX1
,
teX2
,
teX3
)]
)
+
3
,
n_ctx
)
vocab
=
n_vocab
+
n_special
+
n_ctx
trX
,
trM
=
transform_roc
(
trX1
,
trX2
,
trX3
)
vaX
,
vaM
=
transform_roc
(
vaX1
,
vaX2
,
vaX3
)
if
submit
:
teX
,
teM
=
transform_roc
(
teX1
,
teX2
,
teX3
)
n_train
=
len
(
trY
)
n_valid
=
len
(
vaY
)
n_batch_train
=
args
.
n_batch
*
max
(
n_gpu
,
1
)
n_updates_total
=
(
n_train
//
n_batch_train
)
*
args
.
n_iter
dh_model
=
DoubleHeadModel
(
args
,
clf_token
,
'multiple_choice'
,
vocab
,
n_ctx
)
criterion
=
nn
.
CrossEntropyLoss
(
reduce
=
False
)
model_opt
=
OpenAIAdam
(
dh_model
.
parameters
(),
lr
=
args
.
lr
,
schedule
=
args
.
lr_schedule
,
warmup
=
args
.
lr_warmup
,
t_total
=
n_updates_total
,
b1
=
args
.
b1
,
b2
=
args
.
b2
,
e
=
args
.
e
,
l2
=
args
.
l2
,
vector_l2
=
args
.
vector_l2
,
max_grad_norm
=
args
.
max_grad_norm
)
compute_loss_fct
=
MultipleChoiceLossCompute
(
criterion
,
criterion
,
args
.
lm_coef
,
model_opt
)
load_openai_pretrained_model
(
dh_model
.
transformer
,
n_ctx
=
n_ctx
,
n_special
=
n_special
)
dh_model
.
to
(
device
)
dh_model
=
nn
.
DataParallel
(
dh_model
)
n_updates
=
0
n_epochs
=
0
if
dataset
!=
'stsb'
:
trYt
=
trY
if
submit
:
path
=
os
.
path
.
join
(
save_dir
,
desc
,
'best_params'
)
torch
.
save
(
dh_model
.
state_dict
(),
make_path
(
path
))
best_score
=
0
for
i
in
range
(
args
.
n_iter
):
print
(
"running epoch"
,
i
)
run_epoch
()
n_epochs
+=
1
log
(
save_dir
,
desc
)
if
submit
:
path
=
os
.
path
.
join
(
save_dir
,
desc
,
'best_params'
)
dh_model
.
load_state_dict
(
torch
.
load
(
path
))
predict
(
dataset
,
args
.
submission_dir
)
if
args
.
analysis
:
rocstories_analysis
(
data_dir
,
os
.
path
.
join
(
args
.
submission_dir
,
'ROCStories.tsv'
),
os
.
path
.
join
(
log_dir
,
'rocstories.jsonl'
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment