Fix trainer test wrt DeepSpeed + auto_find_bs (#29061)

* FIx trainer test * Update tests/trainer/test_trainer.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

Fix trainer test wrt DeepSpeed + auto_find_bs (#29061)
* FIx trainer test * Update tests/trainer/test_trainer.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
636b0324 · Zach Mueller · GitHub · 161fe425 · 636b0324
Unverified Commit 636b0324 authored Feb 16, 2024 by Zach Mueller Committed by GitHub Feb 16, 2024
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 12 deletions

tests/trainer/test_trainer.py tests/trainer/test_trainer.py +4 -12

No files found.
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -1588,18 +1588,10 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
            auto_find_batch_size=True,
            deepspeed=deepspeed,
        )
-        trainer = Trainer(model, args, train_dataset=train_dataset, callbacks=[MockCudaOOMCallback()])
-        trainer.train()
-        # After `auto_find_batch_size` is ran we should now be at 8
-        self.assertEqual(trainer._train_batch_size, 8)
-
-        # We can then make a new Trainer
-        trainer = Trainer(model, args, train_dataset=train_dataset)
-        # Check we are at 16 to start
-        self.assertEqual(trainer._train_batch_size, 16 * max(trainer.args.n_gpu, 1))
-        trainer.train(resume_from_checkpoint=True)
-        # We should be back to 8 again, picking up based upon the last ran Trainer
-        self.assertEqual(trainer._train_batch_size, 8)
+        # Note: This can have issues, for now we don't support this functionality
+        # ref: https://github.com/huggingface/transformers/pull/29057
+        with self.assertRaises(NotImplementedError):
+            _ = Trainer(model, args, train_dataset=train_dataset, callbacks=[MockCudaOOMCallback()])

    def test_auto_batch_size_with_resume_from_checkpoint(self):
        train_dataset = RegressionDataset(length=128)