".github/actions/vscode:/vscode.git/clone" did not exist on "ba2d9684bfb7132c3608ea77736d99a8d44b1acd"
Unverified Commit 6b487224 authored by Qiming Zhang's avatar Qiming Zhang Committed by GitHub
Browse files

[XPU] bump up xpu-kernel v0.1.5, transpose moe weights (#38342)


Signed-off-by: default avatarmayuyuace <qiming1.zhang@intel.com>
Signed-off-by: default avatarQiming Zhang <qiming1.zhang@intel.com>
Signed-off-by: default avatarKunshang Ji <kunshang.ji@intel.com>
Co-authored-by: default avatarKunshang Ji <kunshang.ji@intel.com>
parent 580090db
...@@ -15,4 +15,4 @@ torch==2.10.0+xpu ...@@ -15,4 +15,4 @@ torch==2.10.0+xpu
torchaudio torchaudio
torchvision torchvision
vllm_xpu_kernels @ https://github.com/vllm-project/vllm-xpu-kernels/releases/download/v0.1.4/vllm_xpu_kernels-0.1.4-cp38-abi3-manylinux_2_28_x86_64.whl vllm_xpu_kernels @ https://github.com/vllm-project/vllm-xpu-kernels/releases/download/v0.1.5/vllm_xpu_kernels-0.1.5-cp38-abi3-manylinux_2_28_x86_64.whl
...@@ -222,6 +222,18 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): ...@@ -222,6 +222,18 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
self.cpu_fused_moe = cpu_fused_moe.CPUFusedMOE(layer) self.cpu_fused_moe = cpu_fused_moe.CPUFusedMOE(layer)
else: else:
self.cpu_fused_moe = cpu_fused_moe.CPUFusedMOE(layer) self.cpu_fused_moe = cpu_fused_moe.CPUFusedMOE(layer)
elif current_platform.is_xpu():
w13 = layer.w13_weight
w2 = layer.w2_weight
w13.data = w13.transpose(-1, -2).contiguous()
w2.data = w2.transpose(-1, -2).contiguous()
self._setup_kernel(
layer=layer,
w13=w13,
w2=w2,
)
else: else:
self._setup_kernel( self._setup_kernel(
layer=layer, layer=layer,
......
...@@ -1028,6 +1028,10 @@ class Fp8OnlineMoEMethod(Fp8MoEMethod): ...@@ -1028,6 +1028,10 @@ class Fp8OnlineMoEMethod(Fp8MoEMethod):
layer.w2_weight[expert, :, :] layer.w2_weight[expert, :, :]
) )
if current_platform.is_xpu():
w13.data = w13.transpose(-1, -2).contiguous()
w2.data = w2.transpose(-1, -2).contiguous()
# Shuffle weights to runtime format and setup kernel. # Shuffle weights to runtime format and setup kernel.
self._setup_kernel( self._setup_kernel(
layer, layer,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment