add VLLM_USE_OPT_ZEROS to replace triton_ (torch.zeros)
set default_max_num_batched_tokens = 10240 update qwen3_moe of layernorm off lightop of moe_fused_gate
Showing
Please register or sign in to comment
set default_max_num_batched_tokens = 10240 update qwen3_moe of layernorm off lightop of moe_fused_gate