Unverified Commit 814843e0 authored by Strahinja Stamenkovic's avatar Strahinja Stamenkovic Committed by GitHub
Browse files

Enable bitsandbytes quantization on AMD GPUs that use warp size 32 (#27307)


Signed-off-by: default avatarsstamenk <strahinja.stamenkovic@amd.com>
parent 20852c8f
...@@ -14,10 +14,13 @@ from vllm.platforms import current_platform ...@@ -14,10 +14,13 @@ from vllm.platforms import current_platform
from ...utils import compare_two_settings, multi_gpu_test from ...utils import compare_two_settings, multi_gpu_test
from ..utils import check_embeddings_close, check_logprobs_close from ..utils import check_embeddings_close, check_logprobs_close
pytestmark = pytest.mark.skipif( if current_platform.is_rocm():
current_platform.is_rocm(), from vllm.platforms.rocm import on_gfx9
reason="bitsandbytes quantization not supported on ROCm (CUDA-only kernels)",
) pytestmark = pytest.mark.skipif(
on_gfx9(),
reason="bitsandbytes not supported on gfx9 (warp size 64 limitation)",
)
models_4bit_to_test = [ models_4bit_to_test = [
("facebook/opt-125m", "quantize opt model inflight"), ("facebook/opt-125m", "quantize opt model inflight"),
......
...@@ -185,6 +185,9 @@ class RocmPlatform(Platform): ...@@ -185,6 +185,9 @@ class RocmPlatform(Platform):
"petit_nvfp4", "petit_nvfp4",
"torchao", "torchao",
] ]
# bitsandbytes not supported on gfx9 (warp size 64 limitation)
if not on_gfx9():
supported_quantization += ["bitsandbytes"]
@classmethod @classmethod
def get_vit_attn_backend( def get_vit_attn_backend(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment