Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
0540d360
Commit
0540d360
authored
Mar 20, 2019
by
Matthew Carrigan
Browse files
Fixed logging
parent
976554a4
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
12 deletions
+14
-12
examples/lm_finetuning/finetune_on_pregenerated.py
examples/lm_finetuning/finetune_on_pregenerated.py
+14
-12
No files found.
examples/lm_finetuning/finetune_on_pregenerated.py
View file @
0540d360
...
...
@@ -16,7 +16,9 @@ from pytorch_pretrained_bert.tokenization import BertTokenizer
from
pytorch_pretrained_bert.optimization
import
BertAdam
,
warmup_linear
InputFeatures
=
namedtuple
(
"InputFeatures"
,
"input_ids input_mask segment_ids lm_label_ids is_next"
)
logger
=
logging
.
getLogger
(
__name__
)
log_format
=
'%(asctime)-10s: %(message)s'
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
log_format
)
def
convert_example_to_features
(
example
,
tokenizer
,
max_seq_length
):
...
...
@@ -68,7 +70,7 @@ class PregeneratedDataset(Dataset):
segment_ids
=
np
.
zeros
(
shape
=
(
num_samples
,
seq_len
),
dtype
=
np
.
bool
)
lm_label_ids
=
np
.
full
(
shape
=
(
num_samples
,
seq_len
),
dtype
=
np
.
int32
,
fill_value
=-
1
)
is_nexts
=
np
.
zeros
(
shape
=
(
num_samples
,),
dtype
=
np
.
bool
)
logg
er
.
info
(
f
"Loading training examples for epoch
{
epoch
}
"
)
logg
ing
.
info
(
f
"Loading training examples for epoch
{
epoch
}
"
)
with
data_file
.
open
()
as
f
:
for
i
,
line
in
enumerate
(
tqdm
(
f
,
total
=
num_samples
,
desc
=
"Training examples"
)):
example
=
json
.
loads
(
line
.
rstrip
())
...
...
@@ -79,7 +81,7 @@ class PregeneratedDataset(Dataset):
lm_label_ids
[
i
]
=
features
.
lm_label_ids
is_nexts
[
i
]
=
features
.
is_next
assert
i
==
num_samples
-
1
# Assert that the sample count metric was true
logg
er
.
info
(
"Loading complete!"
)
logg
ing
.
info
(
"Loading complete!"
)
self
.
num_samples
=
num_samples
self
.
seq_len
=
seq_len
self
.
input_ids
=
input_ids
...
...
@@ -132,8 +134,8 @@ def main():
action
=
'store_true'
,
help
=
"Whether to use 16-bit float precision instead of 32-bit"
)
parser
.
add_argument
(
'--loss_scale'
,
type
=
float
,
default
=
0
,
help
=
"Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.
\n
"
type
=
float
,
default
=
0
,
help
=
"Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.
\n
"
"0 (default value): dynamic loss scaling.
\n
"
"Positive power of 2: static loss scaling value.
\n
"
)
parser
.
add_argument
(
"--warmup_proportion"
,
...
...
@@ -179,7 +181,7 @@ def main():
n_gpu
=
1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
)
logg
er
.
info
(
"device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}"
.
format
(
logg
ing
.
info
(
"device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}"
.
format
(
device
,
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
),
args
.
fp16
))
if
args
.
gradient_accumulation_steps
<
1
:
...
...
@@ -195,7 +197,7 @@ def main():
torch
.
cuda
.
manual_seed_all
(
args
.
seed
)
if
args
.
output_dir
.
is_dir
()
and
list
(
args
.
output_dir
.
iterdir
()):
logg
er
.
warning
(
f
"Output directory (
{
args
.
output_dir
}
) already exists and is not empty!"
)
logg
ing
.
warning
(
f
"Output directory (
{
args
.
output_dir
}
) already exists and is not empty!"
)
args
.
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
bert_model
,
do_lower_case
=
args
.
do_lower_case
)
...
...
@@ -258,10 +260,10 @@ def main():
t_total
=
num_train_optimization_steps
)
global_step
=
0
logg
er
.
info
(
"***** Running training *****"
)
logg
er
.
info
(
f
" Num examples =
{
total_train_examples
}
"
)
logg
er
.
info
(
" Batch size = %d"
,
args
.
train_batch_size
)
logg
er
.
info
(
" Num steps = %d"
,
num_train_optimization_steps
)
logg
ing
.
info
(
"***** Running training *****"
)
logg
ing
.
info
(
f
" Num examples =
{
total_train_examples
}
"
)
logg
ing
.
info
(
" Batch size = %d"
,
args
.
train_batch_size
)
logg
ing
.
info
(
" Num steps = %d"
,
num_train_optimization_steps
)
model
.
train
()
for
epoch
in
range
(
args
.
epochs
):
epoch_dataset
=
PregeneratedDataset
(
epoch
=
epoch
,
training_path
=
args
.
pregenerated_data
,
tokenizer
=
tokenizer
,
...
...
@@ -304,7 +306,7 @@ def main():
global_step
+=
1
# Save a trained model
logg
er
.
info
(
"** ** * Saving fine-tuned model ** ** * "
)
logg
ing
.
info
(
"** ** * Saving fine-tuned model ** ** * "
)
model_to_save
=
model
.
module
if
hasattr
(
model
,
'module'
)
else
model
# Only save the model it-self
output_model_file
=
args
.
output_dir
/
"pytorch_model.bin"
torch
.
save
(
model_to_save
.
state_dict
(),
str
(
output_model_file
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment