Unverified Commit e6d250e4 authored by Younes Belkada's avatar Younes Belkada Committed by GitHub
Browse files

[`core`] fix silent bug `keep_in_fp32` modules (#26589)

* fix silent bug `keep_in_fp32` modules

* final fix

* added a common test.

* Trigger CI

* revert
parent 19f0b7dd
......@@ -693,7 +693,9 @@ def _load_state_dict_into_meta_model(
if dtype is not None and torch.is_floating_point(param):
if (
keep_in_fp32_modules is not None
and any(module_to_keep_in_fp32 in param_name for module_to_keep_in_fp32 in keep_in_fp32_modules)
and any(
module_to_keep_in_fp32 in param_name.split(".") for module_to_keep_in_fp32 in keep_in_fp32_modules
)
and dtype == torch.float16
):
param = param.to(torch.float32)
......@@ -3534,7 +3536,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
if (
keep_in_fp32_modules is not None
and dtype == torch.float16
and any(module_to_keep_in_fp32 in key for module_to_keep_in_fp32 in keep_in_fp32_modules)
and any(
module_to_keep_in_fp32 in key.split(".") for module_to_keep_in_fp32 in keep_in_fp32_modules
)
):
target_dtype = torch.float32
......@@ -3561,7 +3565,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
# Set some modules to fp32 if any
if keep_in_fp32_modules is not None:
for name, param in model.named_parameters():
if any(module_to_keep_in_fp32 in name for module_to_keep_in_fp32 in keep_in_fp32_modules):
if any(module_to_keep_in_fp32 in name.split(".") for module_to_keep_in_fp32 in keep_in_fp32_modules):
# param = param.to(torch.float32) does not work here as only in the local scope.
param.data = param.data.to(torch.float32)
......
......@@ -533,7 +533,7 @@ class InstructBlipModelIntegrationTest(unittest.TestCase):
def test_inference_vicuna_7b(self):
processor = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
model = InstructBlipForConditionalGeneration.from_pretrained(
"Salesforce/instructblip-vicuna-7b", load_in_8bit=True
"Salesforce/instructblip-vicuna-7b", load_in_8bit=True, low_cpu_mem_usage=True
)
url = "https://raw.githubusercontent.com/salesforce/LAVIS/main/docs/_static/Confusing-Pictures.jpg"
......@@ -569,6 +569,7 @@ class InstructBlipModelIntegrationTest(unittest.TestCase):
model = InstructBlipForConditionalGeneration.from_pretrained(
"Salesforce/instructblip-flan-t5-xl",
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
).to(torch_device)
url = "https://raw.githubusercontent.com/salesforce/LAVIS/main/docs/_static/Confusing-Pictures.jpg"
......
......@@ -275,6 +275,24 @@ class ModelTesterMixin:
for p1, p2 in zip(model.parameters(), new_model.parameters()):
self.assertTrue(torch.equal(p1, p2))
def test_keep_in_fp32_modules(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
if model_class._keep_in_fp32_modules is None:
return
model = model_class(config)
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname)
model = model_class.from_pretrained(tmpdirname, torch_dtype=torch.float16)
for name, param in model.named_parameters():
if any(n in model_class._keep_in_fp32_modules for n in name.split(".")):
self.assertTrue(param.dtype == torch.float32)
else:
self.assertTrue(param.dtype == torch.float16, name)
def test_save_load_keys_to_ignore_on_save(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment