[cleanup] assign todos, faster bart-cnn test (#7835)

* 2 beam output * unassign/remove TODOs * remove one more

[cleanup] assign todos, faster bart-cnn test (#7835)
* 2 beam output * unassign/remove TODOs * remove one more
96e47d92 · Sam Shleifer · GitHub · 7b13bd01 · 96e47d92 · 96e47d92
Unverified Commit 96e47d92 authored Oct 16, 2020 by Sam Shleifer Committed by GitHub Oct 16, 2020
8 changed files
--- a/examples/lightning_base.py
+++ b/examples/lightning_base.py
@@ -291,7 +291,8 @@ class LoggingCallback(pl.Callback):


 def add_generic_args(parser, root_dir) -> None:
-    #  TODO(SS): allow all pl args? parser = pl.Trainer.add_argparse_args(parser)
+    #  To allow all pl args uncomment the following line
+    #  parser = pl.Trainer.add_argparse_args(parser)
    parser.add_argument(
        "--output_dir",
        default=None,

--- a/examples/seq2seq/test_bash_script.py
+++ b/examples/seq2seq/test_bash_script.py
@@ -21,7 +21,6 @@ from utils import load_json


 MODEL_NAME = MBART_TINY
-# TODO(SS): MODEL_NAME = "sshleifer/student_mbart_en_ro_1_1"
 MARIAN_MODEL = "sshleifer/student_marian_en_ro_6_1"


@@ -99,7 +98,7 @@ def test_train_mbart_cc25_enro_script():
    assert expected_key in ckpt["state_dict"]
    assert ckpt["state_dict"]["model.model.decoder.layers.0.encoder_attn_layer_norm.weight"].dtype == torch.float32

-    # TODO(SS): turn on args.do_predict when PL bug fixed.
+    # TODO: turn on args.do_predict when PL bug fixed.
    if args.do_predict:
        contents = {os.path.basename(p) for p in contents}
        assert "test_generations.txt" in contents
@@ -178,7 +177,7 @@ def test_opus_mt_distill_script():
    assert expected_key in ckpt["state_dict"]
    assert ckpt["state_dict"]["model.model.decoder.layers.0.encoder_attn_layer_norm.weight"].dtype == torch.float32

-    # TODO(SS): turn on args.do_predict when PL bug fixed.
+    # TODO: turn on args.do_predict when PL bug fixed.
    if args.do_predict:
        contents = {os.path.basename(p) for p in contents}
        assert "test_generations.txt" in contents

--- a/examples/seq2seq/test_finetune_trainer.py
+++ b/examples/seq2seq/test_finetune_trainer.py
@@ -25,7 +25,6 @@ def test_finetune_trainer():

 @slow
 def test_finetune_trainer_slow():
-    # TODO(SS): This will fail on devices with more than 1 GPU.
    # There is a missing call to __init__process_group somewhere
    output_dir = run_trainer(eval_steps=2, max_len="128", model_name=MARIAN_MODEL, num_train_epochs=3)


--- a/examples/seq2seq/test_seq2seq_examples.py
+++ b/examples/seq2seq/test_seq2seq_examples.py
@@ -52,7 +52,7 @@ CHEAP_ARGS = {
    "student_decoder_layers": 1,
    "val_check_interval": 1.0,
    "output_dir": "",
-    "fp16": False,  # TODO(SS): set this to CUDA_AVAILABLE if ci installs apex or start using native amp
+    "fp16": False,  # TODO: set this to CUDA_AVAILABLE if ci installs apex or start using native amp
    "no_teacher": False,
    "fp16_opt_level": "O1",
    "gpus": 1 if CUDA_AVAILABLE else 0,

--- a/src/transformers/convert_pegasus_tf_to_pytorch.py
+++ b/src/transformers/convert_pegasus_tf_to_pytorch.py
@@ -54,8 +54,6 @@ def rename_state_dict_key(k):

 # See appendix C of paper for all hyperparams

-# TODO(SS): one constant
-

 def convert_pegasus(tf_weights: dict, cfg_updates: dict) -> PegasusForConditionalGeneration:
    cfg_kwargs = DEFAULTS.copy()

--- a/src/transformers/tokenization_pegasus.py
+++ b/src/transformers/tokenization_pegasus.py
@@ -154,11 +154,8 @@ class PegasusTokenizer(ReformerTokenizer):
            return model_inputs
        if max_target_length is not None:
            tokenizer_kwargs["max_length"] = max_target_length
-        # TODO(@sshleifer): maybe tgt_texts = [self.pad_token + t for t in tgt_texts]  # add decoder_start_token_id
        labels: BatchEncoding = self(tgt_texts, **tokenizer_kwargs)["input_ids"]
        model_inputs["labels"] = labels
-        # for k, v in decoder_inputs.items():
-        #    model_inputs[f"decoder_{k}"] = v
        return model_inputs


@@ -169,10 +166,6 @@ class PegasusTokenizerFast(ReformerTokenizerFast):
    max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
    slow_tokenizer_class = PegasusTokenizer

-    # def num_special_tokens_to_add(self, pair=False):
-    #     """Just EOS"""
-    #     return 1
-
    def _special_token_mask(self, seq):
        all_special_ids = set(self.all_special_ids)  # call it once instead of inside list comp
        all_special_ids.remove(self.unk_token_id)  # <unk> is only sometimes special
@@ -236,9 +229,6 @@ class PegasusTokenizerFast(ReformerTokenizerFast):
            return model_inputs
        if max_target_length is not None:
            tokenizer_kwargs["max_length"] = max_target_length
-        # TODO(@sshleifer): maybe tgt_texts = [self.pad_token + t for t in tgt_texts]  # add decoder_start_token_id
        labels: BatchEncoding = self(tgt_texts, **tokenizer_kwargs)["input_ids"]
        model_inputs["labels"] = labels
-        # for k, v in decoder_inputs.items():
-        #    model_inputs[f"decoder_{k}"] = v
        return model_inputs
--- a/tests/test_modeling_bart.py
+++ b/tests/test_modeling_bart.py
--- a/tests/test_modeling_fsmt.py
+++ b/tests/test_modeling_fsmt.py
@@ -125,12 +125,9 @@ class FSMTModelTest(ModelTesterMixin, unittest.TestCase):
    all_model_classes = (FSMTModel, FSMTForConditionalGeneration) if is_torch_available() else ()
    all_generative_model_classes = (FSMTForConditionalGeneration,) if is_torch_available() else ()
    is_encoder_decoder = True
-    # TODO(SS): fix the below in a separate PR
    test_pruning = False
-    test_torchscript = True
    test_head_masking = False
-    test_resize_embeddings = True  # This requires inputs_dict['input_ids']
-    test_missing_keys = False  # because FSMTForConditionalGeneration and FSMTModel now have identical state_dict
+    test_missing_keys = False

    def setUp(self):
        self.model_tester = ModelTester(self)
@@ -326,7 +323,6 @@ class FSMTHeadTests(unittest.TestCase):
            max_length=max_length,
        )
        self.assertEqual(new_input_ids.shape, (input_ids.shape[0], max_length))
-        # TODO(SS): uneven length batches, empty inputs

    def test_shift_tokens_right(self):
        input_ids = torch.Tensor([[71, 82, 18, 33, 2, 1, 1], [68, 34, 26, 58, 30, 82, 2]]).long()