Commit 4599e05f authored by zhuwenwen's avatar zhuwenwen
Browse files

skip AiterInt8ScaledMMLinearKernel

parent f509adcb
......@@ -7,9 +7,9 @@ from typing import TypeVar
import torch
from vllm.logger import init_logger
from vllm.model_executor.layers.quantization.kernels.scaled_mm.aiter import (
AiterInt8ScaledMMLinearKernel,
)
# from vllm.model_executor.layers.quantization.kernels.scaled_mm.aiter import (
# AiterInt8ScaledMMLinearKernel,
# )
from vllm.model_executor.layers.quantization.kernels.scaled_mm.cpu import (
CPUInt8ScaledMMLinearKernel,
)
......@@ -51,7 +51,7 @@ _POSSIBLE_INT8_KERNELS: dict[PlatformEnum, list[type[Int8ScaledMMLinearKernel]]]
CutlassInt8ScaledMMLinearKernel,
TritonInt8ScaledMMLinearKernel,
],
PlatformEnum.ROCM: [AiterInt8ScaledMMLinearKernel, TritonInt8ScaledMMLinearKernel],
PlatformEnum.ROCM: [TritonInt8ScaledMMLinearKernel], # AiterInt8ScaledMMLinearKernel
}
# in priority/performance order (when available)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment