"vscode:/vscode.git/clone" did not exist on "aed161e63f02233b56aaff8ef54c670d71f938da"
Unverified Commit 12f2e6c3 authored by Xihuai Wang's avatar Xihuai Wang Committed by GitHub
Browse files

Fix: #3988 using blockwise_int8 (#4023)

parent 95575aa7
...@@ -371,6 +371,8 @@ class BlockInt8MoEMethod: ...@@ -371,6 +371,8 @@ class BlockInt8MoEMethod:
custom_routing_function: Optional[Callable] = None, custom_routing_function: Optional[Callable] = None,
correction_bias: Optional[torch.Tensor] = None, correction_bias: Optional[torch.Tensor] = None,
activation: str = "silu", activation: str = "silu",
inplace: bool = True,
no_combine: bool = False,
) -> torch.Tensor: ) -> torch.Tensor:
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_experts from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_experts
from sglang.srt.layers.moe.topk import select_experts from sglang.srt.layers.moe.topk import select_experts
...@@ -395,7 +397,7 @@ class BlockInt8MoEMethod: ...@@ -395,7 +397,7 @@ class BlockInt8MoEMethod:
layer.w2_weight, layer.w2_weight,
topk_weights=topk_weights, topk_weights=topk_weights,
topk_ids=topk_ids, topk_ids=topk_ids,
inplace=True, inplace=inplace,
activation=activation, activation=activation,
use_int8_w8a8=True, use_int8_w8a8=True,
w1_scale=(layer.w13_weight_scale_inv), w1_scale=(layer.w13_weight_scale_inv),
...@@ -403,4 +405,5 @@ class BlockInt8MoEMethod: ...@@ -403,4 +405,5 @@ class BlockInt8MoEMethod:
a1_scale=layer.w13_input_scale, a1_scale=layer.w13_input_scale,
a2_scale=layer.w2_input_scale, a2_scale=layer.w2_input_scale,
block_shape=self.quant_config.weight_block_size, block_shape=self.quant_config.weight_block_size,
no_combine=no_combine,
) )
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment