"vllm/vscode:/vscode.git/clone" did not exist on "5afd3276dfd70397a08ba16ee8eb246ddb3d13ef"
Unverified Commit 0790f076 authored by BlankR's avatar BlankR Committed by GitHub
Browse files

[Misc] Improve error messages for unsupported types and parameters (#30593)


Signed-off-by: default avatarBlankR <hjyblanche@gmail.com>
Co-authored-by: default avatarWentao Ye <44945378+yewentao256@users.noreply.github.com>
parent 1f33e38e
...@@ -343,7 +343,9 @@ def bench( ...@@ -343,7 +343,9 @@ def bench(
return bench_int8(dtype, m, k, n, label, sub_label) return bench_int8(dtype, m, k, n, label, sub_label)
if dtype == torch.float8_e4m3fn: if dtype == torch.float8_e4m3fn:
return bench_fp8(dtype, m, k, n, label, sub_label) return bench_fp8(dtype, m, k, n, label, sub_label)
raise ValueError("unsupported type") raise ValueError(
f"Unsupported dtype {dtype}: should be one of torch.int8, torch.float8_e4m3fn."
)
# runner # runner
......
...@@ -292,7 +292,10 @@ def chunked_prefill_paged_decode( ...@@ -292,7 +292,10 @@ def chunked_prefill_paged_decode(
elif kv_cache_dtype == "fp8_e5m2": elif kv_cache_dtype == "fp8_e5m2":
target_dtype = torch.float8_e5m2 target_dtype = torch.float8_e5m2
else: else:
raise ValueError("Unsupported FP8 dtype:", kv_cache_dtype) raise ValueError(
f"Unsupported FP8 kv_cache_dtype {kv_cache_dtype}: "
f"should be one of 'fp8', 'fp8_e4m3', 'fp8_e5m2'."
)
key_cache = key_cache.view(target_dtype) key_cache = key_cache.view(target_dtype)
value_cache = value_cache.view(target_dtype) value_cache = value_cache.view(target_dtype)
......
...@@ -90,7 +90,7 @@ class LoRAConfig: ...@@ -90,7 +90,7 @@ class LoRAConfig:
elif self.max_cpu_loras < self.max_loras: elif self.max_cpu_loras < self.max_loras:
raise ValueError( raise ValueError(
f"max_cpu_loras ({self.max_cpu_loras}) must be >= " f"max_cpu_loras ({self.max_cpu_loras}) must be >= "
f"max_loras ({self.max_loras})" f"max_loras ({self.max_loras})."
) )
return self return self
......
...@@ -92,7 +92,10 @@ class ncclDataTypeEnum: ...@@ -92,7 +92,10 @@ class ncclDataTypeEnum:
return cls.ncclFloat64 return cls.ncclFloat64
if dtype == torch.bfloat16: if dtype == torch.bfloat16:
return cls.ncclBfloat16 return cls.ncclBfloat16
raise ValueError(f"Unsupported dtype: {dtype}") raise ValueError(
f"Unsupported dtype {dtype}: should be one of "
f"int8, uint8, int32, int64, float16, float32, float64, bfloat16."
)
ncclRedOp_t = ctypes.c_int ncclRedOp_t = ctypes.c_int
......
...@@ -233,7 +233,10 @@ class RequestTracker: ...@@ -233,7 +233,10 @@ class RequestTracker:
elif isinstance(new_block_ids, list): elif isinstance(new_block_ids, list):
pass pass
else: else:
raise ValueError(f"Unsupported new_block_ids type {type(new_block_ids)}") raise ValueError(
f"Unsupported new_block_ids type {type(new_block_ids)}: "
f"should be None[list[int], ...], tuple or list[int]."
)
self.allocated_block_ids.extend(new_block_ids) self.allocated_block_ids.extend(new_block_ids)
# When a request is scheduled again, and the number of new tokens # When a request is scheduled again, and the number of new tokens
......
...@@ -56,22 +56,22 @@ class AutoRoundConfig(QuantizationConfig): ...@@ -56,22 +56,22 @@ class AutoRoundConfig(QuantizationConfig):
if weight_bits not in self.SUPPORTED_BITS: if weight_bits not in self.SUPPORTED_BITS:
raise ValueError( raise ValueError(
f"Unsupported weight_bits: {weight_bits}, " f"Unsupported weight_bits: {weight_bits}, "
f"currently only support {self.SUPPORTED_BITS}" f"currently only support {self.SUPPORTED_BITS}."
) )
if data_type not in self.SUPPORTED_DTYPES: if data_type not in self.SUPPORTED_DTYPES:
raise ValueError( raise ValueError(
f"Unsupported data_type: {data_type}," f"Unsupported data_type: {data_type}, "
f" currently only support {self.SUPPORTED_DTYPES}" f"currently only support {self.SUPPORTED_DTYPES}."
) )
if packing_format not in self.SUPPORTED_FORMATS: if packing_format not in self.SUPPORTED_FORMATS:
raise ValueError( raise ValueError(
f"Unsupported packing_format: {packing_format}, " f"Unsupported packing_format: {packing_format}, "
f"currently only support {self.SUPPORTED_FORMATS}" f"currently only support {self.SUPPORTED_FORMATS}."
) )
if backend not in self.SUPPORTED_BACKENDS: if backend not in self.SUPPORTED_BACKENDS:
raise ValueError( raise ValueError(
f"Unsupported backend: {backend}, " f"Unsupported backend: {backend}, "
f"currently only support {self.SUPPORTED_BACKENDS}" f"currently only support {self.SUPPORTED_BACKENDS}."
) )
self.weight_bits = weight_bits self.weight_bits = weight_bits
......
...@@ -158,7 +158,10 @@ class CompressedTensorsW8A8Fp8(CompressedTensorsScheme): ...@@ -158,7 +158,10 @@ class CompressedTensorsW8A8Fp8(CompressedTensorsScheme):
input_scale = None input_scale = None
else: else:
raise ValueError(f"Unknown quantization strategy {self.strategy}") raise ValueError(
f"Unknown quantization strategy {self.strategy}: "
f"should be one of {list(QuantizationStrategy)}"
)
# required by torch.compile to be torch.nn.Parameter # required by torch.compile to be torch.nn.Parameter
layer.weight = Parameter(weight.data, requires_grad=False) layer.weight = Parameter(weight.data, requires_grad=False)
......
...@@ -783,7 +783,10 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): ...@@ -783,7 +783,10 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
layer.w13_weight = w13_weight layer.w13_weight = w13_weight
layer.w2_weight = w2_weight layer.w2_weight = w2_weight
else: else:
raise ValueError(f"Unsupported backend: {self.mxfp4_backend}") raise ValueError(
f"Unsupported mxfp4_backend: {self.mxfp4_backend}: "
f"should be one of: {list(Mxfp4Backend)}."
)
def get_fused_moe_quant_config( def get_fused_moe_quant_config(
self, layer: torch.nn.Module self, layer: torch.nn.Module
......
...@@ -599,7 +599,11 @@ def smart_resize( ...@@ -599,7 +599,11 @@ def smart_resize(
w_bar = ceil_by_factor(width * beta, factor) w_bar = ceil_by_factor(width * beta, factor)
if min_pixels > h_bar * w_bar or h_bar * w_bar > max_pixels: if min_pixels > h_bar * w_bar or h_bar * w_bar > max_pixels:
raise ValueError(f"encounter invalid h_bar: {h_bar}, w_bar: {w_bar}") raise ValueError(
f"Invalid h_bar={h_bar}, w_bar={w_bar}: "
f"h_bar * w_bar must be >= min_pixels ({min_pixels}) "
f"and <= max_pixels ({max_pixels})."
)
return h_bar, w_bar return h_bar, w_bar
......
...@@ -348,7 +348,9 @@ class GraniteSpeechConformerAttention(nn.Module): ...@@ -348,7 +348,9 @@ class GraniteSpeechConformerAttention(nn.Module):
if self.context_size <= 0 or self.context_size > self.max_pos_emb: if self.context_size <= 0 or self.context_size > self.max_pos_emb:
raise ValueError( raise ValueError(
"Context size is either less than 0 or exceeds the max_pos_emb" f"Context size should be > 0 and "
f"<= max_pos_emb ({self.max_pos_emb}), "
f"got {self.context_size}."
) )
def forward( def forward(
......
...@@ -332,7 +332,8 @@ class MiniMaxText01DecoderLayer(nn.Module): ...@@ -332,7 +332,8 @@ class MiniMaxText01DecoderLayer(nn.Module):
) )
else: else:
raise ValueError( raise ValueError(
f"Unsupported attention type: {self.config.attention_type}" f"Unsupported attention_type {self.config.attention_type}: "
f"should be 0 (linear) or 1 (full)."
) )
if expert_num == 1: if expert_num == 1:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment