Unverified Commit 1144a8ef authored by NaLan ZeYu's avatar NaLan ZeYu Committed by GitHub
Browse files

[Bugfix] Temporarily disable gptq_bitblas on ROCm (#17411)


Signed-off-by: default avatarYan Cangang <nalanzeyu@gmail.com>
parent 08fb5587
......@@ -80,7 +80,7 @@ The table below shows the compatibility of various quantization implementations
* ✅︎
* ✅︎
* ✅︎
* ✅︎
*
*
*
*
......
......@@ -25,6 +25,7 @@ from vllm.model_executor.parameter import (ChannelQuantScaleParameter,
PackedColumnParameter,
PackedvLLMParameter,
RowvLLMParameter)
from vllm.platforms import current_platform
from vllm.scalar_type import scalar_types
logger = init_logger(__name__)
......@@ -191,6 +192,10 @@ class GPTQBitBLASConfig(QuantizationConfig):
sym = quant_config.get("sym")
desc_act = quant_config.get("desc_act")
# temporarily disable on ROCm platform
if not current_platform.is_cuda():
return False
# If we cannot find the info needed in the config, cannot convert.
if (num_bits is None or group_size is None or sym is None
or desc_act is None):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment