Unverified Commit 83439012 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Fix all offload and MP tests (#17533)

parent 1c57242d
...@@ -574,7 +574,6 @@ def _load_state_dict_into_meta_model( ...@@ -574,7 +574,6 @@ def _load_state_dict_into_meta_model(
for param_name, param in state_dict.items(): for param_name, param in state_dict.items():
# First part of the test is always true as load_state_dict_keys always contains state_dict keys. # First part of the test is always true as load_state_dict_keys always contains state_dict keys.
if param_name not in loaded_state_dict_keys or param_name not in expected_keys: if param_name not in loaded_state_dict_keys or param_name not in expected_keys:
print(param_name)
continue continue
if param_name.startswith(start_prefix): if param_name.startswith(start_prefix):
...@@ -2124,6 +2123,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix ...@@ -2124,6 +2123,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
if model._no_split_modules is None: if model._no_split_modules is None:
raise ValueError(f"{model.__class__.__name__} does not support `device_map='auto'` yet.") raise ValueError(f"{model.__class__.__name__} does not support `device_map='auto'` yet.")
no_split_modules = model._no_split_modules no_split_modules = model._no_split_modules
# Make sure tied weights are tied before creating the device map.
model.tie_weights()
device_map = infer_auto_device_map( device_map = infer_auto_device_map(
model, no_split_module_classes=no_split_modules, dtype=torch_dtype, max_memory=max_memory model, no_split_module_classes=no_split_modules, dtype=torch_dtype, max_memory=max_memory
) )
......
...@@ -63,7 +63,7 @@ class OPTModelTester: ...@@ -63,7 +63,7 @@ class OPTModelTester:
use_labels=False, use_labels=False,
vocab_size=99, vocab_size=99,
hidden_size=16, hidden_size=16,
num_hidden_layers=2, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=4, intermediate_size=4,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -515,6 +515,8 @@ class T5ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase): ...@@ -515,6 +515,8 @@ class T5ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
test_resize_embeddings = True test_resize_embeddings = True
test_model_parallel = True test_model_parallel = True
is_encoder_decoder = True is_encoder_decoder = True
# The small T5 model needs higher percentages for CPU/MP tests
model_split_percents = [0.8, 0.9]
def setUp(self): def setUp(self):
self.model_tester = T5ModelTester(self) self.model_tester = T5ModelTester(self)
......
...@@ -153,6 +153,7 @@ class ModelTesterMixin: ...@@ -153,6 +153,7 @@ class ModelTesterMixin:
test_model_parallel = False test_model_parallel = False
is_encoder_decoder = False is_encoder_decoder = False
has_attentions = True has_attentions = True
model_split_percents = [0.5, 0.7, 0.9]
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
inputs_dict = copy.deepcopy(inputs_dict) inputs_dict = copy.deepcopy(inputs_dict)
...@@ -2217,12 +2218,7 @@ class ModelTesterMixin: ...@@ -2217,12 +2218,7 @@ class ModelTesterMixin:
@require_accelerate @require_accelerate
@require_torch_gpu @require_torch_gpu
def test_disk_offload(self): def test_disk_offload(self):
if all([model_class._no_split_modules is None for model_class in self.all_model_classes]):
return
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
if isinstance(getattr(config, "num_hidden_layers", None), int) and config.num_hidden_layers < 4:
config.num_hidden_layers = 4
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
if model_class._no_split_modules is None: if model_class._no_split_modules is None:
...@@ -2234,8 +2230,7 @@ class ModelTesterMixin: ...@@ -2234,8 +2230,7 @@ class ModelTesterMixin:
base_output = model(**inputs_dict) base_output = model(**inputs_dict)
model_size = compute_module_sizes(model)[""] model_size = compute_module_sizes(model)[""]
# We test several splits of sizes to make sure it works. max_size = int(self.model_split_percents[0] * model_size)
max_size = int(0.4 * model_size)
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
model.cpu().save_pretrained(tmp_dir) model.cpu().save_pretrained(tmp_dir)
...@@ -2256,12 +2251,7 @@ class ModelTesterMixin: ...@@ -2256,12 +2251,7 @@ class ModelTesterMixin:
@require_accelerate @require_accelerate
@require_torch_gpu @require_torch_gpu
def test_cpu_offload(self): def test_cpu_offload(self):
if all([model_class._no_split_modules is None for model_class in self.all_model_classes]):
return
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
if isinstance(getattr(config, "num_hidden_layers", None), int) and config.num_hidden_layers < 4:
config.num_hidden_layers = 4
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
if model_class._no_split_modules is None: if model_class._no_split_modules is None:
...@@ -2274,7 +2264,7 @@ class ModelTesterMixin: ...@@ -2274,7 +2264,7 @@ class ModelTesterMixin:
model_size = compute_module_sizes(model)[""] model_size = compute_module_sizes(model)[""]
# We test several splits of sizes to make sure it works. # We test several splits of sizes to make sure it works.
max_gpu_sizes = [int(p * model_size) for p in [0.5, 0.7, 0.9]] max_gpu_sizes = [int(p * model_size) for p in self.model_split_percents]
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
model.cpu().save_pretrained(tmp_dir) model.cpu().save_pretrained(tmp_dir)
...@@ -2292,12 +2282,7 @@ class ModelTesterMixin: ...@@ -2292,12 +2282,7 @@ class ModelTesterMixin:
@require_accelerate @require_accelerate
@require_torch_multi_gpu @require_torch_multi_gpu
def test_model_parallelism(self): def test_model_parallelism(self):
if all([model_class._no_split_modules is None for model_class in self.all_model_classes]):
return
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
if isinstance(getattr(config, "num_hidden_layers", None), int) and config.num_hidden_layers < 4:
config.num_hidden_layers = 4
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
if model_class._no_split_modules is None: if model_class._no_split_modules is None:
...@@ -2310,7 +2295,7 @@ class ModelTesterMixin: ...@@ -2310,7 +2295,7 @@ class ModelTesterMixin:
model_size = compute_module_sizes(model)[""] model_size = compute_module_sizes(model)[""]
# We test several splits of sizes to make sure it works. # We test several splits of sizes to make sure it works.
max_gpu_sizes = [int(p * model_size) for p in [0.5, 0.7, 0.9]] max_gpu_sizes = [int(p * model_size) for p in self.model_split_percents]
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
model.cpu().save_pretrained(tmp_dir) model.cpu().save_pretrained(tmp_dir)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment