"vscode:/vscode.git/clone" did not exist on "b880ffb87e0bcde5e3693203b480df49e46d67bc"
Unverified Commit 48bfb0c9 authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Bug] Fix R1 Accuracy 0 Bug (#23294)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
Signed-off-by: default avatarWentao Ye <44945378+yewentao256@users.noreply.github.com>
Co-authored-by: default avatarMichael Goin <mgoin64@gmail.com>
parent f8ce0229
...@@ -1099,8 +1099,7 @@ class Fp8MoEMethod(FusedMoEMethodBase): ...@@ -1099,8 +1099,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
apply_router_weight_on_input=apply_router_weight_on_input, apply_router_weight_on_input=apply_router_weight_on_input,
) )
else: else:
from vllm.model_executor.layers.fused_moe import fused_experts common_kwargs = dict(
return fused_experts(
hidden_states=x, hidden_states=x,
w1=layer.w13_weight, w1=layer.w13_weight,
w2=layer.w2_weight, w2=layer.w2_weight,
...@@ -1117,11 +1116,20 @@ class Fp8MoEMethod(FusedMoEMethodBase): ...@@ -1117,11 +1116,20 @@ class Fp8MoEMethod(FusedMoEMethodBase):
if self.block_quant else layer.w2_weight_scale), if self.block_quant else layer.w2_weight_scale),
a1_scale=layer.w13_input_scale, a1_scale=layer.w13_input_scale,
a2_scale=layer.w2_input_scale, a2_scale=layer.w2_input_scale,
)
if self.fused_experts is not None:
return self.fused_experts(**common_kwargs)
else:
from vllm.model_executor.layers.fused_moe import fused_experts
return fused_experts(
**common_kwargs,
use_fp8_w8a8=True, use_fp8_w8a8=True,
block_shape=self.quant_config.weight_block_size, block_shape=self.quant_config.weight_block_size,
allow_deep_gemm=self.allow_deep_gemm, allow_deep_gemm=self.allow_deep_gemm,
allow_cutlass_block_scaled_grouped_gemm=( allow_cutlass_block_scaled_grouped_gemm=(
self.allow_cutlass_block_scaled_grouped_gemm)) self.allow_cutlass_block_scaled_grouped_gemm),
)
class Fp8KVCacheMethod(BaseKVCacheMethod): class Fp8KVCacheMethod(BaseKVCacheMethod):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment