Unverified Commit 59e715f7 authored by kyo's avatar kyo Committed by GitHub
Browse files

Fix the `bitsandbytes` error formatting ("Some modules are dispatched on ...") (#30494)

Fix the `bitsandbytes` error when some modules are not properly offloaded.
parent 19cfdf0f
...@@ -84,14 +84,12 @@ class Bnb4BitHfQuantizer(HfQuantizer): ...@@ -84,14 +84,12 @@ class Bnb4BitHfQuantizer(HfQuantizer):
} }
if "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values(): if "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
raise ValueError( raise ValueError(
""" "Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "
Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "
quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to "
in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to "`from_pretrained`. Check "
`from_pretrained`. Check "https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu "
https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu "for more details. "
for more details.
"""
) )
if version.parse(importlib.metadata.version("bitsandbytes")) < version.parse("0.39.0"): if version.parse(importlib.metadata.version("bitsandbytes")) < version.parse("0.39.0"):
......
...@@ -84,14 +84,12 @@ class Bnb8BitHfQuantizer(HfQuantizer): ...@@ -84,14 +84,12 @@ class Bnb8BitHfQuantizer(HfQuantizer):
} }
if "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values(): if "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
raise ValueError( raise ValueError(
""" "Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "
Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "
quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to "
in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to "`from_pretrained`. Check "
`from_pretrained`. Check "https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu "
https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu "for more details. "
for more details.
"""
) )
if version.parse(importlib.metadata.version("bitsandbytes")) < version.parse("0.37.2"): if version.parse(importlib.metadata.version("bitsandbytes")) < version.parse("0.37.2"):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment