Revert low cpu mem tie weights (#29135)

* Revert "Add tie_weights() to LM heads and set bias in set_output_embeddings() (#28948)" This reverts commit 725f4ad1. * Revert "Patch to skip failing `test_save_load_low_cpu_mem_usage` tests (#29043)" This reverts commit 4156f517.

Revert low cpu mem tie weights (#29135)
* Revert "Add tie_weights() to LM heads and set bias in set_output_embeddings() (#28948)" This reverts commit 725f4ad1. * Revert "Patch to skip failing `test_save_load_low_cpu_mem_usage` tests (#29043)" This reverts commit 4156f517.
0996a100 · amyeroberts · GitHub · 15cfe389 · 0996a100 · 0996a100
Unverified Commit 0996a100 authored Feb 20, 2024 by amyeroberts Committed by GitHub Feb 20, 2024
6 changed files
--- a/tests/models/fsmt/test_modeling_fsmt.py
+++ b/tests/models/fsmt/test_modeling_fsmt.py
@@ -329,12 +329,6 @@ class FSMTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
    def test_resize_embeddings_untied(self):
        pass
-    @unittest.skip(
-        "Not currently compatible. Fails with - NotImplementedError: Cannot copy out of meta tensor; no data!"
-    )
-    def test_save_load_low_cpu_mem_usage(self):
-        pass
 @require_torch
 class FSMTHeadTests(unittest.TestCase):

--- a/tests/models/marian/test_modeling_marian.py
+++ b/tests/models/marian/test_modeling_marian.py
@@ -372,12 +372,6 @@ class MarianModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
    def test_training_gradient_checkpointing_use_reentrant_false(self):
        pass
-    @unittest.skip(
-        "Not currently compatible. Fails with - NotImplementedError: Cannot copy out of meta tensor; no data!"
-    )
-    def test_save_load_low_cpu_mem_usage(self):
-        pass
 def assert_tensors_close(a, b, atol=1e-12, prefix=""):
    """If tensors have different shapes, different values or a and b are not both tensors, raise a nice Assertion error."""

--- a/tests/models/musicgen/test_modeling_musicgen.py
+++ b/tests/models/musicgen/test_modeling_musicgen.py
@@ -1144,10 +1144,6 @@ class MusicgenTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
            self.assertNotIn(config.pad_token_id, output_generate)
-    @unittest.skip("Fails with - TypeError: _weight_norm_interface() missing 1 required positional argument: 'dim'")
-    def test_save_load_low_cpu_mem_usage(self):
-        pass
 def get_bip_bip(bip_duration=0.125, duration=0.5, sample_rate=32000):
    """Produces a series of 'bip bip' sounds at a given frequency."""

--- a/tests/models/reformer/test_modeling_reformer.py
+++ b/tests/models/reformer/test_modeling_reformer.py
@@ -687,12 +687,6 @@ class ReformerLocalAttnModelTest(ReformerTesterMixin, GenerationTesterMixin, Mod
    def test_left_padding_compatibility(self):
        pass
-    @unittest.skip(
-        "Not currently compatible. Fails with - NotImplementedError: Cannot copy out of meta tensor; no data!"
-    )
-    def test_save_load_low_cpu_mem_usage(self):
-        pass
 @require_torch
 class ReformerLSHAttnModelTest(
@@ -854,12 +848,6 @@ class ReformerLSHAttnModelTest(
    def test_left_padding_compatibility(self):
        pass
-    @unittest.skip(
-        "Not currently compatible. Fails with - NotImplementedError: Cannot copy out of meta tensor; no data!"
-    )
-    def test_save_load_low_cpu_mem_usage(self):
-        pass
 @require_torch
 @require_sentencepiece

--- a/tests/models/xlm_roberta_xl/test_modeling_xlm_roberta_xl.py
+++ b/tests/models/xlm_roberta_xl/test_modeling_xlm_roberta_xl.py
@@ -515,12 +515,6 @@ class XLMRobertaXLModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTes
        self.assertEqual(position_ids.shape, expected_positions.shape)
        self.assertTrue(torch.all(torch.eq(position_ids, expected_positions)))
-    @unittest.skip(
-        "Not currently compatible. Fails with - NotImplementedError: Cannot copy out of meta tensor; no data!"
-    )
-    def test_save_load_low_cpu_mem_usage(self):
-        pass
 @require_torch
 class XLMRobertaModelXLIntegrationTest(unittest.TestCase):

--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -435,23 +435,6 @@ class ModelTesterMixin:
                        max_diff = (model_slow_init.state_dict()[key] - model_fast_init.state_dict()[key]).sum().item()
                    self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
-    def test_save_load_low_cpu_mem_usage(self):
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            for model_class in self.all_model_classes:
-                config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-                model_to_save = model_class(config)
-                model_to_save.save_pretrained(tmpdirname)
-                model = model_class.from_pretrained(
-                    tmpdirname,
-                    low_cpu_mem_usage=True,
-                )
-                # The low_cpu_mem_usage=True causes the model params to be initialized with device=meta. If there are
-                # any unloaded or untied parameters, then trying to move it to device=torch_device will throw an error.
-                model.to(torch_device)
    def test_fast_init_context_manager(self):
        # 1. Create a dummy class. Should have buffers as well? To make sure we test __init__
        class MyClass(PreTrainedModel):