Unverified Commit 0790f076 authored by BlankR's avatar BlankR Committed by GitHub
Browse files

[Misc] Improve error messages for unsupported types and parameters (#30593)


Signed-off-by: default avatarBlankR <hjyblanche@gmail.com>
Co-authored-by: default avatarWentao Ye <44945378+yewentao256@users.noreply.github.com>
parent 1f33e38e
......@@ -343,7 +343,9 @@ def bench(
return bench_int8(dtype, m, k, n, label, sub_label)
if dtype == torch.float8_e4m3fn:
return bench_fp8(dtype, m, k, n, label, sub_label)
raise ValueError("unsupported type")
raise ValueError(
f"Unsupported dtype {dtype}: should be one of torch.int8, torch.float8_e4m3fn."
)
# runner
......
......@@ -292,7 +292,10 @@ def chunked_prefill_paged_decode(
elif kv_cache_dtype == "fp8_e5m2":
target_dtype = torch.float8_e5m2
else:
raise ValueError("Unsupported FP8 dtype:", kv_cache_dtype)
raise ValueError(
f"Unsupported FP8 kv_cache_dtype {kv_cache_dtype}: "
f"should be one of 'fp8', 'fp8_e4m3', 'fp8_e5m2'."
)
key_cache = key_cache.view(target_dtype)
value_cache = value_cache.view(target_dtype)
......
......@@ -90,7 +90,7 @@ class LoRAConfig:
elif self.max_cpu_loras < self.max_loras:
raise ValueError(
f"max_cpu_loras ({self.max_cpu_loras}) must be >= "
f"max_loras ({self.max_loras})"
f"max_loras ({self.max_loras})."
)
return self
......
......@@ -92,7 +92,10 @@ class ncclDataTypeEnum:
return cls.ncclFloat64
if dtype == torch.bfloat16:
return cls.ncclBfloat16
raise ValueError(f"Unsupported dtype: {dtype}")
raise ValueError(
f"Unsupported dtype {dtype}: should be one of "
f"int8, uint8, int32, int64, float16, float32, float64, bfloat16."
)
ncclRedOp_t = ctypes.c_int
......
......@@ -233,7 +233,10 @@ class RequestTracker:
elif isinstance(new_block_ids, list):
pass
else:
raise ValueError(f"Unsupported new_block_ids type {type(new_block_ids)}")
raise ValueError(
f"Unsupported new_block_ids type {type(new_block_ids)}: "
f"should be None[list[int], ...], tuple or list[int]."
)
self.allocated_block_ids.extend(new_block_ids)
# When a request is scheduled again, and the number of new tokens
......
......@@ -56,22 +56,22 @@ class AutoRoundConfig(QuantizationConfig):
if weight_bits not in self.SUPPORTED_BITS:
raise ValueError(
f"Unsupported weight_bits: {weight_bits}, "
f"currently only support {self.SUPPORTED_BITS}"
f"currently only support {self.SUPPORTED_BITS}."
)
if data_type not in self.SUPPORTED_DTYPES:
raise ValueError(
f"Unsupported data_type: {data_type},"
f" currently only support {self.SUPPORTED_DTYPES}"
f"Unsupported data_type: {data_type}, "
f"currently only support {self.SUPPORTED_DTYPES}."
)
if packing_format not in self.SUPPORTED_FORMATS:
raise ValueError(
f"Unsupported packing_format: {packing_format}, "
f"currently only support {self.SUPPORTED_FORMATS}"
f"currently only support {self.SUPPORTED_FORMATS}."
)
if backend not in self.SUPPORTED_BACKENDS:
raise ValueError(
f"Unsupported backend: {backend}, "
f"currently only support {self.SUPPORTED_BACKENDS}"
f"Unsupported backend: {backend}, "
f"currently only support {self.SUPPORTED_BACKENDS}."
)
self.weight_bits = weight_bits
......
......@@ -158,7 +158,10 @@ class CompressedTensorsW8A8Fp8(CompressedTensorsScheme):
input_scale = None
else:
raise ValueError(f"Unknown quantization strategy {self.strategy}")
raise ValueError(
f"Unknown quantization strategy {self.strategy}: "
f"should be one of {list(QuantizationStrategy)}"
)
# required by torch.compile to be torch.nn.Parameter
layer.weight = Parameter(weight.data, requires_grad=False)
......
......@@ -783,7 +783,10 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
layer.w13_weight = w13_weight
layer.w2_weight = w2_weight
else:
raise ValueError(f"Unsupported backend: {self.mxfp4_backend}")
raise ValueError(
f"Unsupported mxfp4_backend: {self.mxfp4_backend}: "
f"should be one of: {list(Mxfp4Backend)}."
)
def get_fused_moe_quant_config(
self, layer: torch.nn.Module
......
......@@ -599,7 +599,11 @@ def smart_resize(
w_bar = ceil_by_factor(width * beta, factor)
if min_pixels > h_bar * w_bar or h_bar * w_bar > max_pixels:
raise ValueError(f"encounter invalid h_bar: {h_bar}, w_bar: {w_bar}")
raise ValueError(
f"Invalid h_bar={h_bar}, w_bar={w_bar}: "
f"h_bar * w_bar must be >= min_pixels ({min_pixels}) "
f"and <= max_pixels ({max_pixels})."
)
return h_bar, w_bar
......
......@@ -348,7 +348,9 @@ class GraniteSpeechConformerAttention(nn.Module):
if self.context_size <= 0 or self.context_size > self.max_pos_emb:
raise ValueError(
"Context size is either less than 0 or exceeds the max_pos_emb"
f"Context size should be > 0 and "
f"<= max_pos_emb ({self.max_pos_emb}), "
f"got {self.context_size}."
)
def forward(
......
......@@ -332,7 +332,8 @@ class MiniMaxText01DecoderLayer(nn.Module):
)
else:
raise ValueError(
f"Unsupported attention type: {self.config.attention_type}"
f"Unsupported attention_type {self.config.attention_type}: "
f"should be 0 (linear) or 1 (full)."
)
if expert_num == 1:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment