Unverified Commit 6b487224 authored by Qiming Zhang's avatar Qiming Zhang Committed by GitHub
Browse files

[XPU] bump up xpu-kernel v0.1.5, transpose moe weights (#38342)


Signed-off-by: default avatarmayuyuace <qiming1.zhang@intel.com>
Signed-off-by: default avatarQiming Zhang <qiming1.zhang@intel.com>
Signed-off-by: default avatarKunshang Ji <kunshang.ji@intel.com>
Co-authored-by: default avatarKunshang Ji <kunshang.ji@intel.com>
parent 580090db
......@@ -15,4 +15,4 @@ torch==2.10.0+xpu
torchaudio
torchvision
vllm_xpu_kernels @ https://github.com/vllm-project/vllm-xpu-kernels/releases/download/v0.1.4/vllm_xpu_kernels-0.1.4-cp38-abi3-manylinux_2_28_x86_64.whl
vllm_xpu_kernels @ https://github.com/vllm-project/vllm-xpu-kernels/releases/download/v0.1.5/vllm_xpu_kernels-0.1.5-cp38-abi3-manylinux_2_28_x86_64.whl
......@@ -222,6 +222,18 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
self.cpu_fused_moe = cpu_fused_moe.CPUFusedMOE(layer)
else:
self.cpu_fused_moe = cpu_fused_moe.CPUFusedMOE(layer)
elif current_platform.is_xpu():
w13 = layer.w13_weight
w2 = layer.w2_weight
w13.data = w13.transpose(-1, -2).contiguous()
w2.data = w2.transpose(-1, -2).contiguous()
self._setup_kernel(
layer=layer,
w13=w13,
w2=w2,
)
else:
self._setup_kernel(
layer=layer,
......
......@@ -1028,6 +1028,10 @@ class Fp8OnlineMoEMethod(Fp8MoEMethod):
layer.w2_weight[expert, :, :]
)
if current_platform.is_xpu():
w13.data = w13.transpose(-1, -2).contiguous()
w2.data = w2.transpose(-1, -2).contiguous()
# Shuffle weights to runtime format and setup kernel.
self._setup_kernel(
layer,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment