Commit 4599e05f authored by zhuwenwen's avatar zhuwenwen
Browse files

skip AiterInt8ScaledMMLinearKernel

parent f509adcb
...@@ -7,9 +7,9 @@ from typing import TypeVar ...@@ -7,9 +7,9 @@ from typing import TypeVar
import torch import torch
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor.layers.quantization.kernels.scaled_mm.aiter import ( # from vllm.model_executor.layers.quantization.kernels.scaled_mm.aiter import (
AiterInt8ScaledMMLinearKernel, # AiterInt8ScaledMMLinearKernel,
) # )
from vllm.model_executor.layers.quantization.kernels.scaled_mm.cpu import ( from vllm.model_executor.layers.quantization.kernels.scaled_mm.cpu import (
CPUInt8ScaledMMLinearKernel, CPUInt8ScaledMMLinearKernel,
) )
...@@ -51,7 +51,7 @@ _POSSIBLE_INT8_KERNELS: dict[PlatformEnum, list[type[Int8ScaledMMLinearKernel]]] ...@@ -51,7 +51,7 @@ _POSSIBLE_INT8_KERNELS: dict[PlatformEnum, list[type[Int8ScaledMMLinearKernel]]]
CutlassInt8ScaledMMLinearKernel, CutlassInt8ScaledMMLinearKernel,
TritonInt8ScaledMMLinearKernel, TritonInt8ScaledMMLinearKernel,
], ],
PlatformEnum.ROCM: [AiterInt8ScaledMMLinearKernel, TritonInt8ScaledMMLinearKernel], PlatformEnum.ROCM: [TritonInt8ScaledMMLinearKernel], # AiterInt8ScaledMMLinearKernel
} }
# in priority/performance order (when available) # in priority/performance order (when available)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment