Unverified Commit dbbe0c75 authored by Fanli Lin's avatar Fanli Lin Committed by GitHub
Browse files

[XPU] Support Triton path for LoRA operations on XPU (#28511)


Signed-off-by: default avatarFanli Lin <fanli.lin@intel.com>
parent 7dca0c90
......@@ -48,6 +48,7 @@ def _lora_expand_kernel(
SLICE_NUM: tl.constexpr,
SAME_STRIDE: tl.constexpr,
USE_GDC: tl.constexpr,
launch_pdl: tl.constexpr,
):
cta_n_num = tl.cdiv(N, BLOCK_N)
cta_m_num = tl.cdiv(M, BLOCK_M)
......
......@@ -46,6 +46,7 @@ def _lora_shrink_kernel(
GROUP_SIZE_M: tl.constexpr,
SLICE_NUM: tl.constexpr,
USE_GDC: tl.constexpr,
launch_pdl: tl.constexpr,
):
cta_n_num = tl.cdiv(N, BLOCK_N)
cta_m_num = tl.cdiv(M, BLOCK_M)
......
......@@ -101,7 +101,11 @@ class XPUPlatform(Platform):
@classmethod
def get_punica_wrapper(cls) -> str:
xpu_use_triton_kernel = os.getenv("XPU_USE_TRITON_KERNEL", "0") == "1"
if not xpu_use_triton_kernel:
return "vllm.lora.punica_wrapper.punica_xpu.PunicaWrapperXPU"
else:
return "vllm.lora.punica_wrapper.punica_gpu.PunicaWrapperGPU"
@classmethod
def get_device_total_memory(cls, device_id: int = 0) -> int:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment