Unverified Commit dc13c99e authored by Christina Norman's avatar Christina Norman Committed by GitHub
Browse files

fix(gguf): Disable bfloat16 for GGUF on blackwell device (#30408)


Signed-off-by: default avatarChristina <truffle@gmail.com>
Signed-off-by: default avatarIsotr0py <2037008807@qq.com>
Signed-off-by: default avatarChristina Norman <christina@example.com>
Co-authored-by: default avatarIsotr0py <isotr0py@users.noreply.github.com>
Co-authored-by: default avatarIsotr0py <2037008807@qq.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent 3e34adcd
......@@ -33,6 +33,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
)
from vllm.model_executor.models.utils import WeightsMapper
from vllm.model_executor.utils import set_weight_attrs
from vllm.platforms import current_platform
from vllm.utils.torch_utils import direct_register_custom_op
logger = init_logger(__name__)
......@@ -52,6 +53,11 @@ class GGUFConfig(QuantizationConfig):
return "gguf"
def get_supported_act_dtypes(self) -> list[torch.dtype]:
# GGUF dequantization kernels use half precision (fp16) internally.
# bfloat16 has precision issues on Blackwell devices.
if current_platform.has_device_capability(100):
logger.warning_once("GGUF has precision issues with bfloat16 on Blackwell.")
return [torch.half, torch.float32]
return [torch.half, torch.bfloat16, torch.float32]
@classmethod
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment