[Bugfix] Make torch registration of punica ops optional (#7970)

3cdfe1f3 · bnellnm · GitHub · fdd9daaf · 3cdfe1f3 · 3cdfe1f3
Unverified Commit 3cdfe1f3 authored Aug 28, 2024 by bnellnm Committed by GitHub Aug 28, 2024
7 changed files
--- a/vllm/lora/ops/bgmv_expand.py
+++ b/vllm/lora/ops/bgmv_expand.py
@@ -160,6 +160,9 @@ def _bgmv_expand(
    return
-bgmv_expand = torch.library.custom_op("lora::bgmv_expand",
+try:
+    bgmv_expand = torch.library.custom_op("lora::bgmv_expand",
                                          _bgmv_expand,
                                          mutates_args=["output_tensor"])
+except AttributeError:
+    bgmv_expand = _bgmv_expand
--- a/vllm/lora/ops/bgmv_expand_slice.py
+++ b/vllm/lora/ops/bgmv_expand_slice.py
@@ -173,6 +173,9 @@ def _bgmv_expand_slice(
    return
-bgmv_expand_slice = torch.library.custom_op("lora::bgmv_expand_slice",
+try:
+    bgmv_expand_slice = torch.library.custom_op("lora::bgmv_expand_slice",
                                                _bgmv_expand_slice,
                                                mutates_args=["output_tensor"])
+except AttributeError:
+    bgmv_expand_slice = _bgmv_expand_slice
--- a/vllm/lora/ops/bgmv_shrink.py
+++ b/vllm/lora/ops/bgmv_shrink.py
@@ -142,6 +142,9 @@ def _bgmv_shrink(
    return
-bgmv_shrink = torch.library.custom_op("lora::bgmv_shrink",
+try:
+    bgmv_shrink = torch.library.custom_op("lora::bgmv_shrink",
                                          _bgmv_shrink,
                                          mutates_args=["output_tensor"])
+except AttributeError:
+    bgmv_shrink = _bgmv_shrink
--- a/vllm/lora/ops/sgmv_expand.py
+++ b/vllm/lora/ops/sgmv_expand.py
@@ -192,6 +192,9 @@ def _sgmv_expand(
    return
-sgmv_expand = torch.library.custom_op("lora::sgmv_expand",
+try:
+    sgmv_expand = torch.library.custom_op("lora::sgmv_expand",
                                          _sgmv_expand,
                                          mutates_args=["output_tensor"])
+except AttributeError:
+    sgmv_expand = _sgmv_expand
--- a/vllm/lora/ops/sgmv_expand_slice.py
+++ b/vllm/lora/ops/sgmv_expand_slice.py
@@ -205,6 +205,9 @@ def _sgmv_expand_slice(
    return
-sgmv_expand_slice = torch.library.custom_op("lora::sgmv_expand_slice",
+try:
+    sgmv_expand_slice = torch.library.custom_op("lora::sgmv_expand_slice",
                                                _sgmv_expand_slice,
                                                mutates_args=["output_tensor"])
+except AttributeError:
+    sgmv_expand_slice = _sgmv_expand_slice
--- a/vllm/lora/ops/sgmv_shrink.py
+++ b/vllm/lora/ops/sgmv_shrink.py
@@ -189,6 +189,9 @@ def _sgmv_shrink(
    return
-sgmv_shrink = torch.library.custom_op("lora::sgmv_shrink",
+try:
+    sgmv_shrink = torch.library.custom_op("lora::sgmv_shrink",
                                          _sgmv_shrink,
                                          mutates_args=["output_tensor"])
+except AttributeError:
+    sgmv_shrink = _sgmv_shrink
--- a/vllm/lora/punica.py
+++ b/vllm/lora/punica.py
@@ -10,10 +10,8 @@ from typing import TYPE_CHECKING, Callable, List, Optional, Tuple, Union
 import torch
 from vllm.triton_utils import HAS_TRITON
-from vllm.utils import is_xpu
-# FIXME: xpu path doesn't support torch.library.custom_op
+if HAS_TRITON:
-if HAS_TRITON and not is_xpu():
    from vllm.lora.ops.bgmv_expand import bgmv_expand
    from vllm.lora.ops.bgmv_expand_slice import bgmv_expand_slice
    from vllm.lora.ops.bgmv_shrink import bgmv_shrink