Unverified Commit 285a4801 authored by Younes Belkada's avatar Younes Belkada Committed by GitHub
Browse files

Fix gradient checkpointing + fp16 autocast for most models (#24247)



* fix gc bug

* continue PoC on OPT

* fixes

* :exploding_head:

* fix tests

* remove pytest.mark

* fixup

* forward contrib credits from discussions

* forward contrib credits from discussions

* reverting changes on untouched files.

---------
Co-authored-by: default avatarzhaoqf123 <zhaoqf123@users.noreply.github.com>
Co-authored-by: default avatar7eu7d7 <7eu7d7@users.noreply.github.com>
parent 1815d186
......@@ -279,6 +279,12 @@ class LayoutLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def prepare_layoutlm_batch_inputs():
# Here we prepare a batch of 2 sequences to test a LayoutLM forward pass on:
......
......@@ -275,6 +275,12 @@ class LiltModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_model_from_pretrained(self):
for model_name in LILT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
......
......@@ -697,6 +697,12 @@ class LukeModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_model_from_pretrained(self):
for model_name in LUKE_PRETRAINED_MODEL_ARCHIVE_LIST:
......
......@@ -263,6 +263,12 @@ class MarianModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_save_load_strict(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs()
for model_class in self.all_model_classes:
......
......@@ -155,6 +155,12 @@ class OwlViTVisionModelTest(ModelTesterMixin, unittest.TestCase):
def test_inputs_embeds(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_model_common_attributes(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
......@@ -633,6 +639,12 @@ class OwlViTForObjectDetectionTest(ModelTesterMixin, unittest.TestCase):
def test_training_gradient_checkpointing(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
return
......
......@@ -280,6 +280,12 @@ class PegasusModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs()
input_ids = input_dict["input_ids"]
......
......@@ -332,6 +332,12 @@ class Pix2StructTextModelTest(ModelTesterMixin, unittest.TestCase):
def test_training(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@unittest.skip(reason="Training is tested directly on `Pix2StructTextImageModelTest`")
def test_training_gradient_checkpointing(self):
pass
......
......@@ -161,6 +161,12 @@ class RegNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_model_common_attributes(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
......
......@@ -452,6 +452,12 @@ class RoFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
(
......
......@@ -421,6 +421,12 @@ class SamModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_training_gradient_checkpointing(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@unittest.skip(reason="SamModel has no base class and is not available in MODEL_MAPPING")
def test_save_load_fast_init_from_base(self):
pass
......
......@@ -324,6 +324,12 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTest
def test_training_gradient_checkpointing(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs()
input_features = input_dict["input_features"]
......
......@@ -613,6 +613,12 @@ class SwitchTransformersModelTest(ModelTesterMixin, GenerationTesterMixin, Pipel
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_decoder_model_attention_mask_past(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_beam_sample_generate_dict_output(self):
r"""
......
......@@ -200,6 +200,12 @@ class TimeSeriesTransformerModelTest(ModelTesterMixin, PipelineTesterMixin, unit
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_save_load_strict(self):
config, _ = self.model_tester.prepare_config_and_inputs()
for model_class in self.all_model_classes:
......
......@@ -243,6 +243,12 @@ class VanModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
model = VanModel.from_pretrained(model_name)
self.assertIsNotNone(model)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
# We will verify our results on an image of cute cats
def prepare_img():
......
......@@ -340,6 +340,12 @@ class ViltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_model_outputs_equivalence(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
......
......@@ -549,6 +549,12 @@ class VisualBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_flickr()
self.model_tester.create_and_check_for_flickr(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_model_from_pretrained(self):
for model_name in VISUAL_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
......
......@@ -208,6 +208,12 @@ class ViTMAEModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_pretraining(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
# overwrite from common since ViTMAEForPretraining has random masking, we need to fix the noise
# to generate masks during test
def check_pt_tf_models(self, tf_model, pt_model, pt_inputs_dict):
......
......@@ -202,6 +202,12 @@ class XCLIPVisionModelTest(ModelTesterMixin, unittest.TestCase):
def test_save_load_fast_init_to_base(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_model_from_pretrained(self):
for model_name in XCLIP_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
......
......@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import copy
import gc
......@@ -549,6 +548,41 @@ class ModelTesterMixin:
loss = model(**inputs).loss
loss.backward()
@slow
@require_torch_gpu
def test_training_gradient_checkpointing_autocast(self):
if not self.model_tester.is_training:
return
for model_class in self.all_model_classes:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.use_cache = False
config.return_dict = True
if (
model_class.__name__
in [*get_values(MODEL_MAPPING_NAMES), *get_values(MODEL_FOR_BACKBONE_MAPPING_NAMES)]
or not model_class.supports_gradient_checkpointing
):
continue
model = model_class(config)
model.to(torch_device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
model.gradient_checkpointing_enable()
model.train()
inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
with torch.cuda.amp.autocast(True, dtype=torch.float16):
output = model(**inputs)[0]
loss = output.mean()
loss.backward()
optimizer.step()
for n, param in model.named_parameters():
self.assertTrue(param.grad is not None, f"None gradient in param {n}")
def test_attention_outputs(self):
if not self.has_attentions:
self.skipTest(reason="Model does not output attentions")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment