"vscode:/vscode.git/clone" did not exist on "c81259d33a77f657bce9bd8ab0e3548826df258d"
Unverified Commit 83fd49b1 authored by Zhewen Li's avatar Zhewen Li Committed by GitHub
Browse files

[CI/Build][Bugfix]Fix Quantized Models Test on AMD (#27712)


Signed-off-by: default avatarzhewenli <zhewenli@meta.com>
parent a4a4f0f6
...@@ -908,7 +908,7 @@ steps: ...@@ -908,7 +908,7 @@ steps:
- label: Quantized Models Test # 45 min - label: Quantized Models Test # 45 min
timeout_in_minutes: 60 timeout_in_minutes: 60
mirror_hardwares: [amdexperimental] mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1 agent_pool: mi325_1
# grade: Blocking # grade: Blocking
source_file_dependencies: source_file_dependencies:
......
...@@ -9,10 +9,16 @@ import pytest ...@@ -9,10 +9,16 @@ import pytest
from transformers import BitsAndBytesConfig from transformers import BitsAndBytesConfig
from tests.quantization.utils import is_quant_method_supported from tests.quantization.utils import is_quant_method_supported
from vllm.platforms import current_platform
from ...utils import compare_two_settings, multi_gpu_test from ...utils import compare_two_settings, multi_gpu_test
from ..utils import check_embeddings_close, check_logprobs_close from ..utils import check_embeddings_close, check_logprobs_close
pytestmark = pytest.mark.skipif(
current_platform.is_rocm(),
reason="bitsandbytes quantization not supported on ROCm (CUDA-only kernels)",
)
models_4bit_to_test = [ models_4bit_to_test = [
("facebook/opt-125m", "quantize opt model inflight"), ("facebook/opt-125m", "quantize opt model inflight"),
( (
......
...@@ -413,7 +413,7 @@ class RocmPlatform(Platform): ...@@ -413,7 +413,7 @@ class RocmPlatform(Platform):
"Using AWQ quantization with ROCm, but VLLM_USE_TRITON_AWQ" "Using AWQ quantization with ROCm, but VLLM_USE_TRITON_AWQ"
" is not set, enabling VLLM_USE_TRITON_AWQ." " is not set, enabling VLLM_USE_TRITON_AWQ."
) )
envs.VLLM_USE_TRITON_AWQ = True os.environ["VLLM_USE_TRITON_AWQ"] = "1"
@classmethod @classmethod
def get_punica_wrapper(cls) -> str: def get_punica_wrapper(cls) -> str:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment