update N to N1

8c7075d1 · zhuwenwen · 1d36bb49 · 8c7075d1
Commit 8c7075d1 authored Aug 01, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

vllm/model_executor/layers/quantization/blockwise_int8.py vllm/model_executor/layers/quantization/blockwise_int8.py +1 -1

No files found.
--- a/vllm/model_executor/layers/quantization/blockwise_int8.py
+++ b/vllm/model_executor/layers/quantization/blockwise_int8.py
@@ -432,7 +432,7 @@ class BlockInt8MoEMethod:
        E=layer.w13_weight.shape[0]
        N1=layer.w13_weight.shape[1]
        N2=layer.w2_weight.shape[1]
-        K=N//2
+        K=N1//2
        if [E,N1,N2,K] not in self.tritonsingleton.moe_weight_shapes:
            self.tritonsingleton.moe_weight_shapes.append([E,N1,N2,K])