Unverified Commit 827c5194 authored by Sam Shleifer's avatar Sam Shleifer Committed by GitHub
Browse files

[examples] bump pl=0.9.0 (#7053)

parent ba4bbd92
...@@ -119,7 +119,7 @@ class BaseTransformer(pl.LightningModule): ...@@ -119,7 +119,7 @@ class BaseTransformer(pl.LightningModule):
def get_lr_scheduler(self): def get_lr_scheduler(self):
get_schedule_func = arg_to_scheduler[self.hparams.lr_scheduler] get_schedule_func = arg_to_scheduler[self.hparams.lr_scheduler]
scheduler = get_schedule_func( scheduler = get_schedule_func(
self.opt, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=self.total_steps self.opt, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=self.total_steps()
) )
scheduler = {"scheduler": scheduler, "interval": "step", "frequency": 1} scheduler = {"scheduler": scheduler, "interval": "step", "frequency": 1}
return scheduler return scheduler
...@@ -159,19 +159,20 @@ class BaseTransformer(pl.LightningModule): ...@@ -159,19 +159,20 @@ class BaseTransformer(pl.LightningModule):
def test_epoch_end(self, outputs): def test_epoch_end(self, outputs):
return self.validation_end(outputs) return self.validation_end(outputs)
@property
def total_steps(self) -> int: def total_steps(self) -> int:
"""The number of total training steps that will be run. Used for lr scheduler purposes.""" """The number of total training steps that will be run. Used for lr scheduler purposes."""
num_devices = max(1, self.hparams.gpus) # TODO: consider num_tpu_cores num_devices = max(1, self.hparams.gpus) # TODO: consider num_tpu_cores
effective_batch_size = self.hparams.train_batch_size * self.hparams.accumulate_grad_batches * num_devices effective_batch_size = self.hparams.train_batch_size * self.hparams.accumulate_grad_batches * num_devices
dataset_size = len(self.train_loader.dataset) return (self.dataset_size / effective_batch_size) * self.hparams.max_epochs
return (dataset_size / effective_batch_size) * self.hparams.max_epochs
def setup(self, mode): def setup(self, mode):
if mode == "fit": if mode == "test":
self.dataset_size = len(self.test_dataloader().dataset)
else:
self.train_loader = self.get_dataloader("train", self.hparams.train_batch_size, shuffle=True) self.train_loader = self.get_dataloader("train", self.hparams.train_batch_size, shuffle=True)
self.dataset_size = len(self.train_loader.dataset)
def get_dataloader(self, type_path, batch_size, shuffle=False): def get_dataloader(self, type_path: str, batch_size: int, shuffle: bool = False):
raise NotImplementedError("You must implement this for your task") raise NotImplementedError("You must implement this for your task")
def train_dataloader(self): def train_dataloader(self):
......
...@@ -5,7 +5,7 @@ psutil ...@@ -5,7 +5,7 @@ psutil
sacrebleu sacrebleu
rouge-score rouge-score
tensorflow_datasets tensorflow_datasets
pytorch-lightning==0.8.5 pytorch-lightning==0.9.0
matplotlib matplotlib
git-python==1.0.3 git-python==1.0.3
faiss-cpu faiss-cpu
......
...@@ -12,7 +12,6 @@ For `bertabs` instructions, see [`bertabs/README.md`](bertabs/README.md). ...@@ -12,7 +12,6 @@ For `bertabs` instructions, see [`bertabs/README.md`](bertabs/README.md).
- `MBartForConditionalGeneration` - `MBartForConditionalGeneration`
- `FSMTForConditionalGeneration` - `FSMTForConditionalGeneration`
- `T5ForConditionalGeneration` - `T5ForConditionalGeneration`
## Datasets ## Datasets
...@@ -100,7 +99,7 @@ All finetuning bash scripts call finetune.py (or distillation.py) with reasonabl ...@@ -100,7 +99,7 @@ All finetuning bash scripts call finetune.py (or distillation.py) with reasonabl
To see all the possible command line options, run: To see all the possible command line options, run:
```bash ```bash
./finetune.py --help ./finetune.py --help
``` ```
### Finetuning Training Params ### Finetuning Training Params
...@@ -192,7 +191,7 @@ model = AutoModelForSeq2SeqLM.from_pretrained(f'{output_dir}/best_tfmr') ...@@ -192,7 +191,7 @@ model = AutoModelForSeq2SeqLM.from_pretrained(f'{output_dir}/best_tfmr')
### Fine-tuning using Seq2SeqTrainer ### Fine-tuning using Seq2SeqTrainer
To use `Seq2SeqTrainer` for fine-tuning you should use the `finetune_trainer.py` script. It subclasses `Trainer` to extend it for seq2seq training. Except the `Trainer` releated `TrainingArguments`, it shares the same argument names as that of `finetune.py` file. One notable difference is that, calculating generative metrics (BLEU, ROUGE) is optional and is controlled using the `--predict_with_generate` argument, set this argument to calculate BLEU and ROUGE metrics. To use `Seq2SeqTrainer` for fine-tuning you should use the `finetune_trainer.py` script. It subclasses `Trainer` to extend it for seq2seq training. Except the `Trainer` releated `TrainingArguments`, it shares the same argument names as that of `finetune.py` file. One notable difference is that, calculating generative metrics (BLEU, ROUGE) is optional and is controlled using the `--predict_with_generate` argument, set this argument to calculate BLEU and ROUGE metrics.
With PyTorch 1.6+ it'll automatically use `native AMP` when `--fp16` is set. With PyTorch 1.6+ it'll automatically use `native AMP` when `--fp16` is set.
To see all the possible command line options, run: To see all the possible command line options, run:
...@@ -265,6 +264,7 @@ export DATA_DIR=cnn_dm ...@@ -265,6 +264,7 @@ export DATA_DIR=cnn_dm
--fp16 \ --fp16 \
--bs 32 --bs 32
``` ```
### Multi-GPU Evaluation ### Multi-GPU Evaluation
here is a command to run xsum evaluation on 8 GPUS. It is more than linearly faster than run_eval.py in some cases here is a command to run xsum evaluation on 8 GPUS. It is more than linearly faster than run_eval.py in some cases
because it uses SortishSampler to minimize padding. You can also use it on 1 GPU. `data_dir` must have because it uses SortishSampler to minimize padding. You can also use it on 1 GPU. `data_dir` must have
...@@ -391,6 +391,17 @@ runtime: 13H on V-100 16GB GPU. ...@@ -391,6 +391,17 @@ runtime: 13H on V-100 16GB GPU.
pytest examples/seq2seq/ pytest examples/seq2seq/
``` ```
### Converting pytorch-lightning checkpoints
pytorch lightning ``-do_predict`` often fails, after you are done training, the best way to evaluate your model is to convert it.
This should be done for you, with a file called `{save_dir}/best_tfmr`.
If that file doesn't exist but you have a lightning `.ckpt` file, you can run
```bash
python convert_pl_checkpoint_to_hf.py PATH_TO_CKPT randomly_initialized_hf_model_path save_dir/best_tfmr
```
Then either `run_eval` or `run_distributed_eval` with `save_dir/best_tfmr` (see previous sections)
## Experimental Features ## Experimental Features
These features are harder to use and not always useful. These features are harder to use and not always useful.
...@@ -419,4 +430,3 @@ uses 12,723 batches of length 48 and takes slightly more time 9.5 minutes. ...@@ -419,4 +430,3 @@ uses 12,723 batches of length 48 and takes slightly more time 9.5 minutes.
The feature is still experimental, because: The feature is still experimental, because:
+ we can make it much more robust if we have memory mapped/preprocessed datasets. + we can make it much more robust if we have memory mapped/preprocessed datasets.
+ The speedup over sortish sampler is not that large at the moment. + The speedup over sortish sampler is not that large at the moment.
...@@ -17,7 +17,7 @@ from finetune import main as ft_main ...@@ -17,7 +17,7 @@ from finetune import main as ft_main
from make_student import create_student_by_copying_alternating_layers, get_layers_to_supervise from make_student import create_student_by_copying_alternating_layers, get_layers_to_supervise
from transformers import AutoModelForSeq2SeqLM, MBartTokenizer, T5ForConditionalGeneration from transformers import AutoModelForSeq2SeqLM, MBartTokenizer, T5ForConditionalGeneration
from transformers.modeling_bart import shift_tokens_right from transformers.modeling_bart import shift_tokens_right
from utils import calculate_bleu, freeze_params, label_smoothed_nll_loss, pickle_load, use_task_specific_params from utils import calculate_bleu, freeze_params, label_smoothed_nll_loss, use_task_specific_params
# need the parent dir module # need the parent dir module
...@@ -264,30 +264,6 @@ def create_module(args): ...@@ -264,30 +264,6 @@ def create_module(args):
return model return model
def evaluate_checkpoint(ckpt_path: Path, dest_dir=None):
# TODO(SS): DELETE? Better to convert_pl_ckpt_to_hf and run_eval.py
exp_dir = ckpt_path.parent
if dest_dir is None:
dest_dir = exp_dir
clash = list(dest_dir.glob("test_generations*"))
if clash:
print(f"SKIPPING to avoid overwriting {clash}")
ckpt = torch.load(ckpt_path, map_location="cpu")
if "hparams" in ckpt:
args = argparse.Namespace(**ckpt["hparams"])
else:
args = argparse.Namespace(**pickle_load(exp_dir / "hparams.pkl"))
args.resume_from_checkpoint = str(ckpt_path)
args.do_train = False
args.output_dir = str(dest_dir)
args.n_gpu = 1
args.eval_batch_size = 16
Path(args.output_dir).mkdir(exist_ok=True)
model = create_module(args)
trainer: pl.Trainer = generic_train(model, args, early_stopping_callback=False)
trainer.test(model)
def distill_main(args): def distill_main(args):
Path(args.output_dir).mkdir(exist_ok=True) Path(args.output_dir).mkdir(exist_ok=True)
if len(os.listdir(args.output_dir)) > 3 and args.do_train: if len(os.listdir(args.output_dir)) > 3 and args.do_train:
......
...@@ -181,6 +181,7 @@ class SummarizationModule(BaseTransformer): ...@@ -181,6 +181,7 @@ class SummarizationModule(BaseTransformer):
return self._generative_step(batch) return self._generative_step(batch)
def validation_epoch_end(self, outputs, prefix="val") -> Dict: def validation_epoch_end(self, outputs, prefix="val") -> Dict:
self.step_count += 1 self.step_count += 1
losses = {k: torch.stack([x[k] for x in outputs]).mean() for k in self.loss_names} losses = {k: torch.stack([x[k] for x in outputs]).mean() for k in self.loss_names}
loss = losses["loss"] loss = losses["loss"]
......
...@@ -13,7 +13,7 @@ import torch ...@@ -13,7 +13,7 @@ import torch
import lightning_base import lightning_base
from convert_pl_checkpoint_to_hf import convert_pl_to_hf from convert_pl_checkpoint_to_hf import convert_pl_to_hf
from distillation import distill_main, evaluate_checkpoint from distillation import distill_main
from finetune import SummarizationModule, main from finetune import SummarizationModule, main
from run_eval import generate_summaries_or_translations, run_generate from run_eval import generate_summaries_or_translations, run_generate
from run_eval_search import run_search from run_eval_search import run_search
...@@ -178,7 +178,6 @@ class TestSummarizationDistiller(unittest.TestCase): ...@@ -178,7 +178,6 @@ class TestSummarizationDistiller(unittest.TestCase):
generate_summaries_or_translations(examples, out_path, str(model.output_dir / "best_tfmr")) generate_summaries_or_translations(examples, out_path, str(model.output_dir / "best_tfmr"))
self.assertTrue(Path(out_path).exists()) self.assertTrue(Path(out_path).exists())
evaluate_checkpoint(ckpts[0], dest_dir=Path(tempfile.mkdtemp()))
out_path_new = tempfile.mkdtemp() out_path_new = tempfile.mkdtemp()
convert_pl_to_hf(ckpts[0], transformer_ckpts[0].parent, out_path_new) convert_pl_to_hf(ckpts[0], transformer_ckpts[0].parent, out_path_new)
assert os.path.exists(os.path.join(out_path_new, "pytorch_model.bin")) assert os.path.exists(os.path.join(out_path_new, "pytorch_model.bin"))
...@@ -227,8 +226,6 @@ class TestSummarizationDistiller(unittest.TestCase): ...@@ -227,8 +226,6 @@ class TestSummarizationDistiller(unittest.TestCase):
assert len(all_files) > 2 assert len(all_files) > 2
self.assertEqual(len(transformer_ckpts), 2) self.assertEqual(len(transformer_ckpts), 2)
evaluate_checkpoint(ckpts[0], dest_dir=Path(tempfile.mkdtemp()))
def test_distill_t5(self): def test_distill_t5(self):
updates = dict( updates = dict(
student_encoder_layers=1, student_encoder_layers=1,
......
...@@ -116,8 +116,8 @@ class ExamplesTests(TestCasePlus): ...@@ -116,8 +116,8 @@ class ExamplesTests(TestCasePlus):
testargs.append("--fp16") testargs.append("--fp16")
with patch.object(sys, "argv", testargs): with patch.object(sys, "argv", testargs):
result = run_pl_glue.main() result = run_pl_glue.main()[0]
# for now just testing that the script can run to a completion # for now just testing that the script can run to completion
self.assertGreater(result["acc"], 0.25) self.assertGreater(result["acc"], 0.25)
# #
# TODO: this fails on CI - doesn't get acc/f1>=0.75: # TODO: this fails on CI - doesn't get acc/f1>=0.75:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment