"vllm/model_executor/models/step1.py" did not exist on "21063c11c7d340dbb01460e22d98d3619737cd4d"
Unverified Commit f7dac83d authored by Cody Yu's avatar Cody Yu Committed by GitHub
Browse files

[Kernel] Raise an exception in MoE kernel if the batch size is larger then 65k (#5939)

parent 7c01f706
......@@ -423,6 +423,11 @@ def fused_experts(hidden_states: torch.Tensor,
M, _ = hidden_states.shape
E, N, _ = w1.shape
if M > 65536:
# https://github.com/vllm-project/vllm/issues/5938
raise ValueError("MoE kernel does not support more than 65536 tokens, "
f"but got {M}")
if override_config:
config = override_config
else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment