Fix the `bitsandbytes` error formatting ("Some modules are dispatched on ...") (#30494)

Fix the `bitsandbytes` error when some modules are not properly offloaded.

Fix the `bitsandbytes` error formatting ("Some modules are dispatched on ...") (#30494)
Fix the `bitsandbytes` error when some modules are not properly offloaded.
59e715f7 · kyo · GitHub · 19cfdf0f · 59e715f7 · 59e715f7
Unverified Commit 59e715f7 authored Apr 26, 2024 by kyo Committed by GitHub Apr 26, 2024
Showing with 12 additions and 16 deletions

src/transformers/quantizers/quantizer_bnb_4bit.py src/transformers/quantizers/quantizer_bnb_4bit.py +6 -8

src/transformers/quantizers/quantizer_bnb_8bit.py src/transformers/quantizers/quantizer_bnb_8bit.py +6 -8

No files found.
--- a/src/transformers/quantizers/quantizer_bnb_4bit.py
+++ b/src/transformers/quantizers/quantizer_bnb_4bit.py
@@ -84,14 +84,12 @@ class Bnb4BitHfQuantizer(HfQuantizer):
            }
            if "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
                raise ValueError(
-                    """
+                    "Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "
-                    Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the
+                    "quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "
-                    quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules
+                    "in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to "
-                    in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to
+                    "`from_pretrained`. Check "
-                    `from_pretrained`. Check
+                    "https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu "
-                    https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
+                    "for more details. "
-                    for more details.
-                    """
                )
        if version.parse(importlib.metadata.version("bitsandbytes")) < version.parse("0.39.0"):

--- a/src/transformers/quantizers/quantizer_bnb_8bit.py
+++ b/src/transformers/quantizers/quantizer_bnb_8bit.py
@@ -84,14 +84,12 @@ class Bnb8BitHfQuantizer(HfQuantizer):
            }
            if "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
                raise ValueError(
-                    """
+                    "Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "
-                    Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the
+                    "quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "
-                    quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules
+                    "in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to "
-                    in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to
+                    "`from_pretrained`. Check "
-                    `from_pretrained`. Check
+                    "https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu "
-                    https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
+                    "for more details. "
-                    for more details.
-                    """
                )
        if version.parse(importlib.metadata.version("bitsandbytes")) < version.parse("0.37.2"):