Unverified Commit 96e47d92 authored by Sam Shleifer's avatar Sam Shleifer Committed by GitHub
Browse files

[cleanup] assign todos, faster bart-cnn test (#7835)

* 2 beam output

* unassign/remove TODOs

* remove one more
parent 7b13bd01
......@@ -291,7 +291,8 @@ class LoggingCallback(pl.Callback):
def add_generic_args(parser, root_dir) -> None:
# TODO(SS): allow all pl args? parser = pl.Trainer.add_argparse_args(parser)
# To allow all pl args uncomment the following line
# parser = pl.Trainer.add_argparse_args(parser)
parser.add_argument(
"--output_dir",
default=None,
......
......@@ -21,7 +21,6 @@ from utils import load_json
MODEL_NAME = MBART_TINY
# TODO(SS): MODEL_NAME = "sshleifer/student_mbart_en_ro_1_1"
MARIAN_MODEL = "sshleifer/student_marian_en_ro_6_1"
......@@ -99,7 +98,7 @@ def test_train_mbart_cc25_enro_script():
assert expected_key in ckpt["state_dict"]
assert ckpt["state_dict"]["model.model.decoder.layers.0.encoder_attn_layer_norm.weight"].dtype == torch.float32
# TODO(SS): turn on args.do_predict when PL bug fixed.
# TODO: turn on args.do_predict when PL bug fixed.
if args.do_predict:
contents = {os.path.basename(p) for p in contents}
assert "test_generations.txt" in contents
......@@ -178,7 +177,7 @@ def test_opus_mt_distill_script():
assert expected_key in ckpt["state_dict"]
assert ckpt["state_dict"]["model.model.decoder.layers.0.encoder_attn_layer_norm.weight"].dtype == torch.float32
# TODO(SS): turn on args.do_predict when PL bug fixed.
# TODO: turn on args.do_predict when PL bug fixed.
if args.do_predict:
contents = {os.path.basename(p) for p in contents}
assert "test_generations.txt" in contents
......
......@@ -25,7 +25,6 @@ def test_finetune_trainer():
@slow
def test_finetune_trainer_slow():
# TODO(SS): This will fail on devices with more than 1 GPU.
# There is a missing call to __init__process_group somewhere
output_dir = run_trainer(eval_steps=2, max_len="128", model_name=MARIAN_MODEL, num_train_epochs=3)
......
......@@ -52,7 +52,7 @@ CHEAP_ARGS = {
"student_decoder_layers": 1,
"val_check_interval": 1.0,
"output_dir": "",
"fp16": False, # TODO(SS): set this to CUDA_AVAILABLE if ci installs apex or start using native amp
"fp16": False, # TODO: set this to CUDA_AVAILABLE if ci installs apex or start using native amp
"no_teacher": False,
"fp16_opt_level": "O1",
"gpus": 1 if CUDA_AVAILABLE else 0,
......
......@@ -54,8 +54,6 @@ def rename_state_dict_key(k):
# See appendix C of paper for all hyperparams
# TODO(SS): one constant
def convert_pegasus(tf_weights: dict, cfg_updates: dict) -> PegasusForConditionalGeneration:
cfg_kwargs = DEFAULTS.copy()
......
......@@ -154,11 +154,8 @@ class PegasusTokenizer(ReformerTokenizer):
return model_inputs
if max_target_length is not None:
tokenizer_kwargs["max_length"] = max_target_length
# TODO(@sshleifer): maybe tgt_texts = [self.pad_token + t for t in tgt_texts] # add decoder_start_token_id
labels: BatchEncoding = self(tgt_texts, **tokenizer_kwargs)["input_ids"]
model_inputs["labels"] = labels
# for k, v in decoder_inputs.items():
# model_inputs[f"decoder_{k}"] = v
return model_inputs
......@@ -169,10 +166,6 @@ class PegasusTokenizerFast(ReformerTokenizerFast):
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
slow_tokenizer_class = PegasusTokenizer
# def num_special_tokens_to_add(self, pair=False):
# """Just EOS"""
# return 1
def _special_token_mask(self, seq):
all_special_ids = set(self.all_special_ids) # call it once instead of inside list comp
all_special_ids.remove(self.unk_token_id) # <unk> is only sometimes special
......@@ -236,9 +229,6 @@ class PegasusTokenizerFast(ReformerTokenizerFast):
return model_inputs
if max_target_length is not None:
tokenizer_kwargs["max_length"] = max_target_length
# TODO(@sshleifer): maybe tgt_texts = [self.pad_token + t for t in tgt_texts] # add decoder_start_token_id
labels: BatchEncoding = self(tgt_texts, **tokenizer_kwargs)["input_ids"]
model_inputs["labels"] = labels
# for k, v in decoder_inputs.items():
# model_inputs[f"decoder_{k}"] = v
return model_inputs
This diff is collapsed.
......@@ -125,12 +125,9 @@ class FSMTModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (FSMTModel, FSMTForConditionalGeneration) if is_torch_available() else ()
all_generative_model_classes = (FSMTForConditionalGeneration,) if is_torch_available() else ()
is_encoder_decoder = True
# TODO(SS): fix the below in a separate PR
test_pruning = False
test_torchscript = True
test_head_masking = False
test_resize_embeddings = True # This requires inputs_dict['input_ids']
test_missing_keys = False # because FSMTForConditionalGeneration and FSMTModel now have identical state_dict
test_missing_keys = False
def setUp(self):
self.model_tester = ModelTester(self)
......@@ -326,7 +323,6 @@ class FSMTHeadTests(unittest.TestCase):
max_length=max_length,
)
self.assertEqual(new_input_ids.shape, (input_ids.shape[0], max_length))
# TODO(SS): uneven length batches, empty inputs
def test_shift_tokens_right(self):
input_ids = torch.Tensor([[71, 82, 18, 33, 2, 1, 1], [68, 34, 26, 58, 30, 82, 2]]).long()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment