Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
0540d360
"src/git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "8b632e0a0814983afcf4f9bb774a7935ce71e60f"
Commit
0540d360
authored
Mar 20, 2019
by
Matthew Carrigan
Browse files
Fixed logging
parent
976554a4
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
12 deletions
+14
-12
examples/lm_finetuning/finetune_on_pregenerated.py
examples/lm_finetuning/finetune_on_pregenerated.py
+14
-12
No files found.
examples/lm_finetuning/finetune_on_pregenerated.py
View file @
0540d360
...
...
@@ -16,7 +16,9 @@ from pytorch_pretrained_bert.tokenization import BertTokenizer
from
pytorch_pretrained_bert.optimization
import
BertAdam
,
warmup_linear
InputFeatures
=
namedtuple
(
"InputFeatures"
,
"input_ids input_mask segment_ids lm_label_ids is_next"
)
logger
=
logging
.
getLogger
(
__name__
)
log_format
=
'%(asctime)-10s: %(message)s'
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
log_format
)
def
convert_example_to_features
(
example
,
tokenizer
,
max_seq_length
):
...
...
@@ -68,7 +70,7 @@ class PregeneratedDataset(Dataset):
segment_ids
=
np
.
zeros
(
shape
=
(
num_samples
,
seq_len
),
dtype
=
np
.
bool
)
lm_label_ids
=
np
.
full
(
shape
=
(
num_samples
,
seq_len
),
dtype
=
np
.
int32
,
fill_value
=-
1
)
is_nexts
=
np
.
zeros
(
shape
=
(
num_samples
,),
dtype
=
np
.
bool
)
logg
er
.
info
(
f
"Loading training examples for epoch
{
epoch
}
"
)
logg
ing
.
info
(
f
"Loading training examples for epoch
{
epoch
}
"
)
with
data_file
.
open
()
as
f
:
for
i
,
line
in
enumerate
(
tqdm
(
f
,
total
=
num_samples
,
desc
=
"Training examples"
)):
example
=
json
.
loads
(
line
.
rstrip
())
...
...
@@ -79,7 +81,7 @@ class PregeneratedDataset(Dataset):
lm_label_ids
[
i
]
=
features
.
lm_label_ids
is_nexts
[
i
]
=
features
.
is_next
assert
i
==
num_samples
-
1
# Assert that the sample count metric was true
logg
er
.
info
(
"Loading complete!"
)
logg
ing
.
info
(
"Loading complete!"
)
self
.
num_samples
=
num_samples
self
.
seq_len
=
seq_len
self
.
input_ids
=
input_ids
...
...
@@ -132,8 +134,8 @@ def main():
action
=
'store_true'
,
help
=
"Whether to use 16-bit float precision instead of 32-bit"
)
parser
.
add_argument
(
'--loss_scale'
,
type
=
float
,
default
=
0
,
help
=
"Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.
\n
"
type
=
float
,
default
=
0
,
help
=
"Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.
\n
"
"0 (default value): dynamic loss scaling.
\n
"
"Positive power of 2: static loss scaling value.
\n
"
)
parser
.
add_argument
(
"--warmup_proportion"
,
...
...
@@ -179,7 +181,7 @@ def main():
n_gpu
=
1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
)
logg
er
.
info
(
"device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}"
.
format
(
logg
ing
.
info
(
"device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}"
.
format
(
device
,
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
),
args
.
fp16
))
if
args
.
gradient_accumulation_steps
<
1
:
...
...
@@ -195,7 +197,7 @@ def main():
torch
.
cuda
.
manual_seed_all
(
args
.
seed
)
if
args
.
output_dir
.
is_dir
()
and
list
(
args
.
output_dir
.
iterdir
()):
logg
er
.
warning
(
f
"Output directory (
{
args
.
output_dir
}
) already exists and is not empty!"
)
logg
ing
.
warning
(
f
"Output directory (
{
args
.
output_dir
}
) already exists and is not empty!"
)
args
.
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
bert_model
,
do_lower_case
=
args
.
do_lower_case
)
...
...
@@ -258,10 +260,10 @@ def main():
t_total
=
num_train_optimization_steps
)
global_step
=
0
logg
er
.
info
(
"***** Running training *****"
)
logg
er
.
info
(
f
" Num examples =
{
total_train_examples
}
"
)
logg
er
.
info
(
" Batch size = %d"
,
args
.
train_batch_size
)
logg
er
.
info
(
" Num steps = %d"
,
num_train_optimization_steps
)
logg
ing
.
info
(
"***** Running training *****"
)
logg
ing
.
info
(
f
" Num examples =
{
total_train_examples
}
"
)
logg
ing
.
info
(
" Batch size = %d"
,
args
.
train_batch_size
)
logg
ing
.
info
(
" Num steps = %d"
,
num_train_optimization_steps
)
model
.
train
()
for
epoch
in
range
(
args
.
epochs
):
epoch_dataset
=
PregeneratedDataset
(
epoch
=
epoch
,
training_path
=
args
.
pregenerated_data
,
tokenizer
=
tokenizer
,
...
...
@@ -304,7 +306,7 @@ def main():
global_step
+=
1
# Save a trained model
logg
er
.
info
(
"** ** * Saving fine-tuned model ** ** * "
)
logg
ing
.
info
(
"** ** * Saving fine-tuned model ** ** * "
)
model_to_save
=
model
.
module
if
hasattr
(
model
,
'module'
)
else
model
# Only save the model it-self
output_model_file
=
args
.
output_dir
/
"pytorch_model.bin"
torch
.
save
(
model_to_save
.
state_dict
(),
str
(
output_model_file
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment