Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
827c5194
Unverified
Commit
827c5194
authored
Oct 11, 2020
by
Sam Shleifer
Committed by
GitHub
Oct 11, 2020
Browse files
[examples] bump pl=0.9.0 (#7053)
parent
ba4bbd92
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
27 additions
and
42 deletions
+27
-42
examples/lightning_base.py
examples/lightning_base.py
+7
-6
examples/requirements.txt
examples/requirements.txt
+1
-1
examples/seq2seq/README.md
examples/seq2seq/README.md
+14
-4
examples/seq2seq/distillation.py
examples/seq2seq/distillation.py
+1
-25
examples/seq2seq/finetune.py
examples/seq2seq/finetune.py
+1
-0
examples/seq2seq/test_seq2seq_examples.py
examples/seq2seq/test_seq2seq_examples.py
+1
-4
examples/test_examples.py
examples/test_examples.py
+2
-2
No files found.
examples/lightning_base.py
View file @
827c5194
...
...
@@ -119,7 +119,7 @@ class BaseTransformer(pl.LightningModule):
def
get_lr_scheduler
(
self
):
get_schedule_func
=
arg_to_scheduler
[
self
.
hparams
.
lr_scheduler
]
scheduler
=
get_schedule_func
(
self
.
opt
,
num_warmup_steps
=
self
.
hparams
.
warmup_steps
,
num_training_steps
=
self
.
total_steps
self
.
opt
,
num_warmup_steps
=
self
.
hparams
.
warmup_steps
,
num_training_steps
=
self
.
total_steps
()
)
scheduler
=
{
"scheduler"
:
scheduler
,
"interval"
:
"step"
,
"frequency"
:
1
}
return
scheduler
...
...
@@ -159,19 +159,20 @@ class BaseTransformer(pl.LightningModule):
def
test_epoch_end
(
self
,
outputs
):
return
self
.
validation_end
(
outputs
)
@
property
def
total_steps
(
self
)
->
int
:
"""The number of total training steps that will be run. Used for lr scheduler purposes."""
num_devices
=
max
(
1
,
self
.
hparams
.
gpus
)
# TODO: consider num_tpu_cores
effective_batch_size
=
self
.
hparams
.
train_batch_size
*
self
.
hparams
.
accumulate_grad_batches
*
num_devices
dataset_size
=
len
(
self
.
train_loader
.
dataset
)
return
(
dataset_size
/
effective_batch_size
)
*
self
.
hparams
.
max_epochs
return
(
self
.
dataset_size
/
effective_batch_size
)
*
self
.
hparams
.
max_epochs
def
setup
(
self
,
mode
):
if
mode
==
"fit"
:
if
mode
==
"test"
:
self
.
dataset_size
=
len
(
self
.
test_dataloader
().
dataset
)
else
:
self
.
train_loader
=
self
.
get_dataloader
(
"train"
,
self
.
hparams
.
train_batch_size
,
shuffle
=
True
)
self
.
dataset_size
=
len
(
self
.
train_loader
.
dataset
)
def
get_dataloader
(
self
,
type_path
,
batch_size
,
shuffle
=
False
):
def
get_dataloader
(
self
,
type_path
:
str
,
batch_size
:
int
,
shuffle
:
bool
=
False
):
raise
NotImplementedError
(
"You must implement this for your task"
)
def
train_dataloader
(
self
):
...
...
examples/requirements.txt
View file @
827c5194
...
...
@@ -5,7 +5,7 @@ psutil
sacrebleu
rouge-score
tensorflow_datasets
pytorch-lightning==0.
8.5
pytorch-lightning==0.
9.0
matplotlib
git-python==1.0.3
faiss-cpu
...
...
examples/seq2seq/README.md
View file @
827c5194
...
...
@@ -13,7 +13,6 @@ For `bertabs` instructions, see [`bertabs/README.md`](bertabs/README.md).
-
`FSMTForConditionalGeneration`
-
`T5ForConditionalGeneration`
## Datasets
#### XSUM:
...
...
@@ -100,7 +99,7 @@ All finetuning bash scripts call finetune.py (or distillation.py) with reasonabl
To see all the possible command line options, run:
```
bash
./finetune.py
--help
./finetune.py
--help
```
### Finetuning Training Params
...
...
@@ -265,6 +264,7 @@ export DATA_DIR=cnn_dm
--fp16
\
--bs
32
```
### Multi-GPU Evaluation
here is a command to run xsum evaluation on 8 GPUS. It is more than linearly faster than run_eval.py in some cases
because it uses SortishSampler to minimize padding. You can also use it on 1 GPU.
`data_dir`
must have
...
...
@@ -391,6 +391,17 @@ runtime: 13H on V-100 16GB GPU.
pytest examples/seq2seq/
```
### Converting pytorch-lightning checkpoints
pytorch lightning
``-do_predict``
often fails, after you are done training, the best way to evaluate your model is to convert it.
This should be done for you, with a file called
`{save_dir}/best_tfmr`
.
If that file doesn't exist but you have a lightning
`.ckpt`
file, you can run
```
bash
python convert_pl_checkpoint_to_hf.py PATH_TO_CKPT randomly_initialized_hf_model_path save_dir/best_tfmr
```
Then either
`run_eval`
or
`run_distributed_eval`
with
`save_dir/best_tfmr`
(see previous sections)
## Experimental Features
These features are harder to use and not always useful.
...
...
@@ -419,4 +430,3 @@ uses 12,723 batches of length 48 and takes slightly more time 9.5 minutes.
The feature is still experimental, because:
+
we can make it much more robust if we have memory mapped/preprocessed datasets.
+
The speedup over sortish sampler is not that large at the moment.
examples/seq2seq/distillation.py
View file @
827c5194
...
...
@@ -17,7 +17,7 @@ from finetune import main as ft_main
from
make_student
import
create_student_by_copying_alternating_layers
,
get_layers_to_supervise
from
transformers
import
AutoModelForSeq2SeqLM
,
MBartTokenizer
,
T5ForConditionalGeneration
from
transformers.modeling_bart
import
shift_tokens_right
from
utils
import
calculate_bleu
,
freeze_params
,
label_smoothed_nll_loss
,
pickle_load
,
use_task_specific_params
from
utils
import
calculate_bleu
,
freeze_params
,
label_smoothed_nll_loss
,
use_task_specific_params
# need the parent dir module
...
...
@@ -264,30 +264,6 @@ def create_module(args):
return
model
def
evaluate_checkpoint
(
ckpt_path
:
Path
,
dest_dir
=
None
):
# TODO(SS): DELETE? Better to convert_pl_ckpt_to_hf and run_eval.py
exp_dir
=
ckpt_path
.
parent
if
dest_dir
is
None
:
dest_dir
=
exp_dir
clash
=
list
(
dest_dir
.
glob
(
"test_generations*"
))
if
clash
:
print
(
f
"SKIPPING to avoid overwriting
{
clash
}
"
)
ckpt
=
torch
.
load
(
ckpt_path
,
map_location
=
"cpu"
)
if
"hparams"
in
ckpt
:
args
=
argparse
.
Namespace
(
**
ckpt
[
"hparams"
])
else
:
args
=
argparse
.
Namespace
(
**
pickle_load
(
exp_dir
/
"hparams.pkl"
))
args
.
resume_from_checkpoint
=
str
(
ckpt_path
)
args
.
do_train
=
False
args
.
output_dir
=
str
(
dest_dir
)
args
.
n_gpu
=
1
args
.
eval_batch_size
=
16
Path
(
args
.
output_dir
).
mkdir
(
exist_ok
=
True
)
model
=
create_module
(
args
)
trainer
:
pl
.
Trainer
=
generic_train
(
model
,
args
,
early_stopping_callback
=
False
)
trainer
.
test
(
model
)
def
distill_main
(
args
):
Path
(
args
.
output_dir
).
mkdir
(
exist_ok
=
True
)
if
len
(
os
.
listdir
(
args
.
output_dir
))
>
3
and
args
.
do_train
:
...
...
examples/seq2seq/finetune.py
View file @
827c5194
...
...
@@ -181,6 +181,7 @@ class SummarizationModule(BaseTransformer):
return
self
.
_generative_step
(
batch
)
def
validation_epoch_end
(
self
,
outputs
,
prefix
=
"val"
)
->
Dict
:
self
.
step_count
+=
1
losses
=
{
k
:
torch
.
stack
([
x
[
k
]
for
x
in
outputs
]).
mean
()
for
k
in
self
.
loss_names
}
loss
=
losses
[
"loss"
]
...
...
examples/seq2seq/test_seq2seq_examples.py
View file @
827c5194
...
...
@@ -13,7 +13,7 @@ import torch
import
lightning_base
from
convert_pl_checkpoint_to_hf
import
convert_pl_to_hf
from
distillation
import
distill_main
,
evaluate_checkpoint
from
distillation
import
distill_main
from
finetune
import
SummarizationModule
,
main
from
run_eval
import
generate_summaries_or_translations
,
run_generate
from
run_eval_search
import
run_search
...
...
@@ -178,7 +178,6 @@ class TestSummarizationDistiller(unittest.TestCase):
generate_summaries_or_translations
(
examples
,
out_path
,
str
(
model
.
output_dir
/
"best_tfmr"
))
self
.
assertTrue
(
Path
(
out_path
).
exists
())
evaluate_checkpoint
(
ckpts
[
0
],
dest_dir
=
Path
(
tempfile
.
mkdtemp
()))
out_path_new
=
tempfile
.
mkdtemp
()
convert_pl_to_hf
(
ckpts
[
0
],
transformer_ckpts
[
0
].
parent
,
out_path_new
)
assert
os
.
path
.
exists
(
os
.
path
.
join
(
out_path_new
,
"pytorch_model.bin"
))
...
...
@@ -227,8 +226,6 @@ class TestSummarizationDistiller(unittest.TestCase):
assert
len
(
all_files
)
>
2
self
.
assertEqual
(
len
(
transformer_ckpts
),
2
)
evaluate_checkpoint
(
ckpts
[
0
],
dest_dir
=
Path
(
tempfile
.
mkdtemp
()))
def
test_distill_t5
(
self
):
updates
=
dict
(
student_encoder_layers
=
1
,
...
...
examples/test_examples.py
View file @
827c5194
...
...
@@ -116,8 +116,8 @@ class ExamplesTests(TestCasePlus):
testargs
.
append
(
"--fp16"
)
with
patch
.
object
(
sys
,
"argv"
,
testargs
):
result
=
run_pl_glue
.
main
()
# for now just testing that the script can run to
a
completion
result
=
run_pl_glue
.
main
()
[
0
]
# for now just testing that the script can run to completion
self
.
assertGreater
(
result
[
"acc"
],
0.25
)
#
# TODO: this fails on CI - doesn't get acc/f1>=0.75:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment