[Misc] Improve error messages for unsupported types and parameters (#30593)

Signed-off-by: BlankR <hjyblanche@gmail.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>

[Misc] Improve error messages for unsupported types and parameters (#30593)
Signed-off-by: BlankR <hjyblanche@gmail.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
0790f076 · BlankR · GitHub · 1f33e38e · 0790f076 · 0790f076
Unverified Commit 0790f076 authored Jan 07, 2026 by BlankR Committed by GitHub Jan 07, 2026
11 changed files
--- a/benchmarks/cutlass_benchmarks/sparse_benchmarks.py
+++ b/benchmarks/cutlass_benchmarks/sparse_benchmarks.py
@@ -343,7 +343,9 @@ def bench(
        return bench_int8(dtype, m, k, n, label, sub_label)
    if dtype == torch.float8_e4m3fn:
        return bench_fp8(dtype, m, k, n, label, sub_label)
-    raise ValueError("unsupported type")
+    raise ValueError(
+        f"Unsupported dtype {dtype}: should be one of torch.int8, torch.float8_e4m3fn."
+    )


 # runner

--- a/vllm/attention/ops/chunked_prefill_paged_decode.py
+++ b/vllm/attention/ops/chunked_prefill_paged_decode.py
@@ -292,7 +292,10 @@ def chunked_prefill_paged_decode(
        elif kv_cache_dtype == "fp8_e5m2":
            target_dtype = torch.float8_e5m2
        else:
-            raise ValueError("Unsupported FP8 dtype:", kv_cache_dtype)
+            raise ValueError(
+                f"Unsupported FP8 kv_cache_dtype {kv_cache_dtype}: "
+                f"should be one of 'fp8', 'fp8_e4m3', 'fp8_e5m2'."
+            )

        key_cache = key_cache.view(target_dtype)
        value_cache = value_cache.view(target_dtype)

--- a/vllm/config/lora.py
+++ b/vllm/config/lora.py
@@ -90,7 +90,7 @@ class LoRAConfig:
        elif self.max_cpu_loras < self.max_loras:
            raise ValueError(
                f"max_cpu_loras ({self.max_cpu_loras}) must be >= "
-                f"max_loras ({self.max_loras})"
+                f"max_loras ({self.max_loras})."
            )

        return self

--- a/vllm/distributed/device_communicators/pynccl_wrapper.py
+++ b/vllm/distributed/device_communicators/pynccl_wrapper.py
@@ -92,7 +92,10 @@ class ncclDataTypeEnum:
            return cls.ncclFloat64
        if dtype == torch.bfloat16:
            return cls.ncclBfloat16
-        raise ValueError(f"Unsupported dtype: {dtype}")
+        raise ValueError(
+            f"Unsupported dtype {dtype}: should be one of "
+            f"int8, uint8, int32, int64, float16, float32, float64, bfloat16."
+        )


 ncclRedOp_t = ctypes.c_int

--- a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/vllm_v1_adapter.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/vllm_v1_adapter.py
@@ -233,7 +233,10 @@ class RequestTracker:
        elif isinstance(new_block_ids, list):
            pass
        else:
-            raise ValueError(f"Unsupported new_block_ids type {type(new_block_ids)}")
+            raise ValueError(
+                f"Unsupported new_block_ids type {type(new_block_ids)}: "
+                f"should be None[list[int], ...], tuple or list[int]."
+            )
        self.allocated_block_ids.extend(new_block_ids)

        # When a request is scheduled again, and the number of new tokens

--- a/vllm/model_executor/layers/quantization/auto_round.py
+++ b/vllm/model_executor/layers/quantization/auto_round.py
@@ -56,22 +56,22 @@ class AutoRoundConfig(QuantizationConfig):
        if weight_bits not in self.SUPPORTED_BITS:
            raise ValueError(
                f"Unsupported weight_bits: {weight_bits}, "
-                f"currently only support  {self.SUPPORTED_BITS}"
+                f"currently only support {self.SUPPORTED_BITS}."
            )
        if data_type not in self.SUPPORTED_DTYPES:
            raise ValueError(
-                f"Unsupported data_type: {data_type},"
-                f" currently only support  {self.SUPPORTED_DTYPES}"
+                f"Unsupported data_type: {data_type}, "
+                f"currently only support {self.SUPPORTED_DTYPES}."
            )
        if packing_format not in self.SUPPORTED_FORMATS:
            raise ValueError(
                f"Unsupported packing_format: {packing_format}, "
-                f"currently only support  {self.SUPPORTED_FORMATS}"
+                f"currently only support {self.SUPPORTED_FORMATS}."
            )
        if backend not in self.SUPPORTED_BACKENDS:
            raise ValueError(
-                f"Unsupported backend: {backend},  "
-                f"currently only support  {self.SUPPORTED_BACKENDS}"
+                f"Unsupported backend: {backend}, "
+                f"currently only support {self.SUPPORTED_BACKENDS}."
            )

        self.weight_bits = weight_bits

--- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
@@ -158,7 +158,10 @@ class CompressedTensorsW8A8Fp8(CompressedTensorsScheme):
            input_scale = None

        else:
-            raise ValueError(f"Unknown quantization strategy {self.strategy}")
+            raise ValueError(
+                f"Unknown quantization strategy {self.strategy}: "
+                f"should be one of {list(QuantizationStrategy)}"
+            )

        # required by torch.compile to be torch.nn.Parameter
        layer.weight = Parameter(weight.data, requires_grad=False)

--- a/vllm/model_executor/layers/quantization/mxfp4.py
+++ b/vllm/model_executor/layers/quantization/mxfp4.py
@@ -783,7 +783,10 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
            layer.w13_weight = w13_weight
            layer.w2_weight = w2_weight
        else:
-            raise ValueError(f"Unsupported backend: {self.mxfp4_backend}")
+            raise ValueError(
+                f"Unsupported mxfp4_backend: {self.mxfp4_backend}: "
+                f"should be one of: {list(Mxfp4Backend)}."
+            )

    def get_fused_moe_quant_config(
        self, layer: torch.nn.Module

--- a/vllm/model_executor/models/ernie45_vl.py
+++ b/vllm/model_executor/models/ernie45_vl.py
@@ -599,7 +599,11 @@ def smart_resize(
        w_bar = ceil_by_factor(width * beta, factor)

    if min_pixels > h_bar * w_bar or h_bar * w_bar > max_pixels:
-        raise ValueError(f"encounter invalid h_bar: {h_bar}, w_bar: {w_bar}")
+        raise ValueError(
+            f"Invalid h_bar={h_bar}, w_bar={w_bar}: "
+            f"h_bar * w_bar must be >= min_pixels ({min_pixels}) "
+            f"and <= max_pixels ({max_pixels})."
+        )

    return h_bar, w_bar


--- a/vllm/model_executor/models/granite_speech.py
+++ b/vllm/model_executor/models/granite_speech.py
@@ -348,7 +348,9 @@ class GraniteSpeechConformerAttention(nn.Module):

        if self.context_size <= 0 or self.context_size > self.max_pos_emb:
            raise ValueError(
-                "Context size is either less than 0 or exceeds the max_pos_emb"
+                f"Context size should be > 0 and "
+                f"<= max_pos_emb ({self.max_pos_emb}), "
+                f"got {self.context_size}."
            )

    def forward(

--- a/vllm/model_executor/models/minimax_text_01.py
+++ b/vllm/model_executor/models/minimax_text_01.py
@@ -332,7 +332,8 @@ class MiniMaxText01DecoderLayer(nn.Module):
            )
        else:
            raise ValueError(
-                f"Unsupported attention type: {self.config.attention_type}"
+                f"Unsupported attention_type {self.config.attention_type}: "
+                f"should be 0 (linear) or 1 (full)."
            )

        if expert_num == 1: