Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
827c5194
Unverified
Commit
827c5194
authored
Oct 11, 2020
by
Sam Shleifer
Committed by
GitHub
Oct 11, 2020
Browse files
[examples] bump pl=0.9.0 (#7053)
parent
ba4bbd92
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
27 additions
and
42 deletions
+27
-42
examples/lightning_base.py
examples/lightning_base.py
+7
-6
examples/requirements.txt
examples/requirements.txt
+1
-1
examples/seq2seq/README.md
examples/seq2seq/README.md
+14
-4
examples/seq2seq/distillation.py
examples/seq2seq/distillation.py
+1
-25
examples/seq2seq/finetune.py
examples/seq2seq/finetune.py
+1
-0
examples/seq2seq/test_seq2seq_examples.py
examples/seq2seq/test_seq2seq_examples.py
+1
-4
examples/test_examples.py
examples/test_examples.py
+2
-2
No files found.
examples/lightning_base.py
View file @
827c5194
...
@@ -119,7 +119,7 @@ class BaseTransformer(pl.LightningModule):
...
@@ -119,7 +119,7 @@ class BaseTransformer(pl.LightningModule):
def
get_lr_scheduler
(
self
):
def
get_lr_scheduler
(
self
):
get_schedule_func
=
arg_to_scheduler
[
self
.
hparams
.
lr_scheduler
]
get_schedule_func
=
arg_to_scheduler
[
self
.
hparams
.
lr_scheduler
]
scheduler
=
get_schedule_func
(
scheduler
=
get_schedule_func
(
self
.
opt
,
num_warmup_steps
=
self
.
hparams
.
warmup_steps
,
num_training_steps
=
self
.
total_steps
self
.
opt
,
num_warmup_steps
=
self
.
hparams
.
warmup_steps
,
num_training_steps
=
self
.
total_steps
()
)
)
scheduler
=
{
"scheduler"
:
scheduler
,
"interval"
:
"step"
,
"frequency"
:
1
}
scheduler
=
{
"scheduler"
:
scheduler
,
"interval"
:
"step"
,
"frequency"
:
1
}
return
scheduler
return
scheduler
...
@@ -159,19 +159,20 @@ class BaseTransformer(pl.LightningModule):
...
@@ -159,19 +159,20 @@ class BaseTransformer(pl.LightningModule):
def
test_epoch_end
(
self
,
outputs
):
def
test_epoch_end
(
self
,
outputs
):
return
self
.
validation_end
(
outputs
)
return
self
.
validation_end
(
outputs
)
@
property
def
total_steps
(
self
)
->
int
:
def
total_steps
(
self
)
->
int
:
"""The number of total training steps that will be run. Used for lr scheduler purposes."""
"""The number of total training steps that will be run. Used for lr scheduler purposes."""
num_devices
=
max
(
1
,
self
.
hparams
.
gpus
)
# TODO: consider num_tpu_cores
num_devices
=
max
(
1
,
self
.
hparams
.
gpus
)
# TODO: consider num_tpu_cores
effective_batch_size
=
self
.
hparams
.
train_batch_size
*
self
.
hparams
.
accumulate_grad_batches
*
num_devices
effective_batch_size
=
self
.
hparams
.
train_batch_size
*
self
.
hparams
.
accumulate_grad_batches
*
num_devices
dataset_size
=
len
(
self
.
train_loader
.
dataset
)
return
(
self
.
dataset_size
/
effective_batch_size
)
*
self
.
hparams
.
max_epochs
return
(
dataset_size
/
effective_batch_size
)
*
self
.
hparams
.
max_epochs
def
setup
(
self
,
mode
):
def
setup
(
self
,
mode
):
if
mode
==
"fit"
:
if
mode
==
"test"
:
self
.
dataset_size
=
len
(
self
.
test_dataloader
().
dataset
)
else
:
self
.
train_loader
=
self
.
get_dataloader
(
"train"
,
self
.
hparams
.
train_batch_size
,
shuffle
=
True
)
self
.
train_loader
=
self
.
get_dataloader
(
"train"
,
self
.
hparams
.
train_batch_size
,
shuffle
=
True
)
self
.
dataset_size
=
len
(
self
.
train_loader
.
dataset
)
def
get_dataloader
(
self
,
type_path
,
batch_size
,
shuffle
=
False
):
def
get_dataloader
(
self
,
type_path
:
str
,
batch_size
:
int
,
shuffle
:
bool
=
False
):
raise
NotImplementedError
(
"You must implement this for your task"
)
raise
NotImplementedError
(
"You must implement this for your task"
)
def
train_dataloader
(
self
):
def
train_dataloader
(
self
):
...
...
examples/requirements.txt
View file @
827c5194
...
@@ -5,7 +5,7 @@ psutil
...
@@ -5,7 +5,7 @@ psutil
sacrebleu
sacrebleu
rouge-score
rouge-score
tensorflow_datasets
tensorflow_datasets
pytorch-lightning==0.
8.5
pytorch-lightning==0.
9.0
matplotlib
matplotlib
git-python==1.0.3
git-python==1.0.3
faiss-cpu
faiss-cpu
...
...
examples/seq2seq/README.md
View file @
827c5194
...
@@ -13,7 +13,6 @@ For `bertabs` instructions, see [`bertabs/README.md`](bertabs/README.md).
...
@@ -13,7 +13,6 @@ For `bertabs` instructions, see [`bertabs/README.md`](bertabs/README.md).
-
`FSMTForConditionalGeneration`
-
`FSMTForConditionalGeneration`
-
`T5ForConditionalGeneration`
-
`T5ForConditionalGeneration`
## Datasets
## Datasets
#### XSUM:
#### XSUM:
...
@@ -100,7 +99,7 @@ All finetuning bash scripts call finetune.py (or distillation.py) with reasonabl
...
@@ -100,7 +99,7 @@ All finetuning bash scripts call finetune.py (or distillation.py) with reasonabl
To see all the possible command line options, run:
To see all the possible command line options, run:
```
bash
```
bash
./finetune.py
--help
./finetune.py
--help
```
```
### Finetuning Training Params
### Finetuning Training Params
...
@@ -265,6 +264,7 @@ export DATA_DIR=cnn_dm
...
@@ -265,6 +264,7 @@ export DATA_DIR=cnn_dm
--fp16
\
--fp16
\
--bs
32
--bs
32
```
```
### Multi-GPU Evaluation
### Multi-GPU Evaluation
here is a command to run xsum evaluation on 8 GPUS. It is more than linearly faster than run_eval.py in some cases
here is a command to run xsum evaluation on 8 GPUS. It is more than linearly faster than run_eval.py in some cases
because it uses SortishSampler to minimize padding. You can also use it on 1 GPU.
`data_dir`
must have
because it uses SortishSampler to minimize padding. You can also use it on 1 GPU.
`data_dir`
must have
...
@@ -391,6 +391,17 @@ runtime: 13H on V-100 16GB GPU.
...
@@ -391,6 +391,17 @@ runtime: 13H on V-100 16GB GPU.
pytest examples/seq2seq/
pytest examples/seq2seq/
```
```
### Converting pytorch-lightning checkpoints
pytorch lightning
``-do_predict``
often fails, after you are done training, the best way to evaluate your model is to convert it.
This should be done for you, with a file called
`{save_dir}/best_tfmr`
.
If that file doesn't exist but you have a lightning
`.ckpt`
file, you can run
```
bash
python convert_pl_checkpoint_to_hf.py PATH_TO_CKPT randomly_initialized_hf_model_path save_dir/best_tfmr
```
Then either
`run_eval`
or
`run_distributed_eval`
with
`save_dir/best_tfmr`
(see previous sections)
## Experimental Features
## Experimental Features
These features are harder to use and not always useful.
These features are harder to use and not always useful.
...
@@ -419,4 +430,3 @@ uses 12,723 batches of length 48 and takes slightly more time 9.5 minutes.
...
@@ -419,4 +430,3 @@ uses 12,723 batches of length 48 and takes slightly more time 9.5 minutes.
The feature is still experimental, because:
The feature is still experimental, because:
+
we can make it much more robust if we have memory mapped/preprocessed datasets.
+
we can make it much more robust if we have memory mapped/preprocessed datasets.
+
The speedup over sortish sampler is not that large at the moment.
+
The speedup over sortish sampler is not that large at the moment.
examples/seq2seq/distillation.py
View file @
827c5194
...
@@ -17,7 +17,7 @@ from finetune import main as ft_main
...
@@ -17,7 +17,7 @@ from finetune import main as ft_main
from
make_student
import
create_student_by_copying_alternating_layers
,
get_layers_to_supervise
from
make_student
import
create_student_by_copying_alternating_layers
,
get_layers_to_supervise
from
transformers
import
AutoModelForSeq2SeqLM
,
MBartTokenizer
,
T5ForConditionalGeneration
from
transformers
import
AutoModelForSeq2SeqLM
,
MBartTokenizer
,
T5ForConditionalGeneration
from
transformers.modeling_bart
import
shift_tokens_right
from
transformers.modeling_bart
import
shift_tokens_right
from
utils
import
calculate_bleu
,
freeze_params
,
label_smoothed_nll_loss
,
pickle_load
,
use_task_specific_params
from
utils
import
calculate_bleu
,
freeze_params
,
label_smoothed_nll_loss
,
use_task_specific_params
# need the parent dir module
# need the parent dir module
...
@@ -264,30 +264,6 @@ def create_module(args):
...
@@ -264,30 +264,6 @@ def create_module(args):
return
model
return
model
def
evaluate_checkpoint
(
ckpt_path
:
Path
,
dest_dir
=
None
):
# TODO(SS): DELETE? Better to convert_pl_ckpt_to_hf and run_eval.py
exp_dir
=
ckpt_path
.
parent
if
dest_dir
is
None
:
dest_dir
=
exp_dir
clash
=
list
(
dest_dir
.
glob
(
"test_generations*"
))
if
clash
:
print
(
f
"SKIPPING to avoid overwriting
{
clash
}
"
)
ckpt
=
torch
.
load
(
ckpt_path
,
map_location
=
"cpu"
)
if
"hparams"
in
ckpt
:
args
=
argparse
.
Namespace
(
**
ckpt
[
"hparams"
])
else
:
args
=
argparse
.
Namespace
(
**
pickle_load
(
exp_dir
/
"hparams.pkl"
))
args
.
resume_from_checkpoint
=
str
(
ckpt_path
)
args
.
do_train
=
False
args
.
output_dir
=
str
(
dest_dir
)
args
.
n_gpu
=
1
args
.
eval_batch_size
=
16
Path
(
args
.
output_dir
).
mkdir
(
exist_ok
=
True
)
model
=
create_module
(
args
)
trainer
:
pl
.
Trainer
=
generic_train
(
model
,
args
,
early_stopping_callback
=
False
)
trainer
.
test
(
model
)
def
distill_main
(
args
):
def
distill_main
(
args
):
Path
(
args
.
output_dir
).
mkdir
(
exist_ok
=
True
)
Path
(
args
.
output_dir
).
mkdir
(
exist_ok
=
True
)
if
len
(
os
.
listdir
(
args
.
output_dir
))
>
3
and
args
.
do_train
:
if
len
(
os
.
listdir
(
args
.
output_dir
))
>
3
and
args
.
do_train
:
...
...
examples/seq2seq/finetune.py
View file @
827c5194
...
@@ -181,6 +181,7 @@ class SummarizationModule(BaseTransformer):
...
@@ -181,6 +181,7 @@ class SummarizationModule(BaseTransformer):
return
self
.
_generative_step
(
batch
)
return
self
.
_generative_step
(
batch
)
def
validation_epoch_end
(
self
,
outputs
,
prefix
=
"val"
)
->
Dict
:
def
validation_epoch_end
(
self
,
outputs
,
prefix
=
"val"
)
->
Dict
:
self
.
step_count
+=
1
self
.
step_count
+=
1
losses
=
{
k
:
torch
.
stack
([
x
[
k
]
for
x
in
outputs
]).
mean
()
for
k
in
self
.
loss_names
}
losses
=
{
k
:
torch
.
stack
([
x
[
k
]
for
x
in
outputs
]).
mean
()
for
k
in
self
.
loss_names
}
loss
=
losses
[
"loss"
]
loss
=
losses
[
"loss"
]
...
...
examples/seq2seq/test_seq2seq_examples.py
View file @
827c5194
...
@@ -13,7 +13,7 @@ import torch
...
@@ -13,7 +13,7 @@ import torch
import
lightning_base
import
lightning_base
from
convert_pl_checkpoint_to_hf
import
convert_pl_to_hf
from
convert_pl_checkpoint_to_hf
import
convert_pl_to_hf
from
distillation
import
distill_main
,
evaluate_checkpoint
from
distillation
import
distill_main
from
finetune
import
SummarizationModule
,
main
from
finetune
import
SummarizationModule
,
main
from
run_eval
import
generate_summaries_or_translations
,
run_generate
from
run_eval
import
generate_summaries_or_translations
,
run_generate
from
run_eval_search
import
run_search
from
run_eval_search
import
run_search
...
@@ -178,7 +178,6 @@ class TestSummarizationDistiller(unittest.TestCase):
...
@@ -178,7 +178,6 @@ class TestSummarizationDistiller(unittest.TestCase):
generate_summaries_or_translations
(
examples
,
out_path
,
str
(
model
.
output_dir
/
"best_tfmr"
))
generate_summaries_or_translations
(
examples
,
out_path
,
str
(
model
.
output_dir
/
"best_tfmr"
))
self
.
assertTrue
(
Path
(
out_path
).
exists
())
self
.
assertTrue
(
Path
(
out_path
).
exists
())
evaluate_checkpoint
(
ckpts
[
0
],
dest_dir
=
Path
(
tempfile
.
mkdtemp
()))
out_path_new
=
tempfile
.
mkdtemp
()
out_path_new
=
tempfile
.
mkdtemp
()
convert_pl_to_hf
(
ckpts
[
0
],
transformer_ckpts
[
0
].
parent
,
out_path_new
)
convert_pl_to_hf
(
ckpts
[
0
],
transformer_ckpts
[
0
].
parent
,
out_path_new
)
assert
os
.
path
.
exists
(
os
.
path
.
join
(
out_path_new
,
"pytorch_model.bin"
))
assert
os
.
path
.
exists
(
os
.
path
.
join
(
out_path_new
,
"pytorch_model.bin"
))
...
@@ -227,8 +226,6 @@ class TestSummarizationDistiller(unittest.TestCase):
...
@@ -227,8 +226,6 @@ class TestSummarizationDistiller(unittest.TestCase):
assert
len
(
all_files
)
>
2
assert
len
(
all_files
)
>
2
self
.
assertEqual
(
len
(
transformer_ckpts
),
2
)
self
.
assertEqual
(
len
(
transformer_ckpts
),
2
)
evaluate_checkpoint
(
ckpts
[
0
],
dest_dir
=
Path
(
tempfile
.
mkdtemp
()))
def
test_distill_t5
(
self
):
def
test_distill_t5
(
self
):
updates
=
dict
(
updates
=
dict
(
student_encoder_layers
=
1
,
student_encoder_layers
=
1
,
...
...
examples/test_examples.py
View file @
827c5194
...
@@ -116,8 +116,8 @@ class ExamplesTests(TestCasePlus):
...
@@ -116,8 +116,8 @@ class ExamplesTests(TestCasePlus):
testargs
.
append
(
"--fp16"
)
testargs
.
append
(
"--fp16"
)
with
patch
.
object
(
sys
,
"argv"
,
testargs
):
with
patch
.
object
(
sys
,
"argv"
,
testargs
):
result
=
run_pl_glue
.
main
()
result
=
run_pl_glue
.
main
()
[
0
]
# for now just testing that the script can run to
a
completion
# for now just testing that the script can run to completion
self
.
assertGreater
(
result
[
"acc"
],
0.25
)
self
.
assertGreater
(
result
[
"acc"
],
0.25
)
#
#
# TODO: this fails on CI - doesn't get acc/f1>=0.75:
# TODO: this fails on CI - doesn't get acc/f1>=0.75:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment