Unverified Commit e7e5a305 authored by Baizhou Zhang's avatar Baizhou Zhang Committed by GitHub
Browse files

Update batch size limitation of dsv3_router_gemm kernel to 16 (#8051)

parent dd7ca006
...@@ -252,8 +252,7 @@ class MoEGate(nn.Module): ...@@ -252,8 +252,7 @@ class MoEGate(nn.Module):
# NOTE: For some unknown reason, router_gemm seems degrade accept length. # NOTE: For some unknown reason, router_gemm seems degrade accept length.
if ( if (
_is_cuda _is_cuda
and not self.is_nextn and hidden_states.shape[0] <= 16
and hidden_states.shape[0] < 4
and hidden_states.shape[1] == 7168 and hidden_states.shape[1] == 7168
and self.weight.shape[0] == 256 and self.weight.shape[0] == 256
and _device_sm >= 90 and _device_sm >= 90
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment