[platform] support pytorch custom op pluggable (#11328)

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>

[platform] support pytorch custom op pluggable (#11328)
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
ef725fea · wangxiyuan · GitHub · d907be7d · ef725fea · ef725fea
Unverified Commit ef725fea authored Jan 10, 2025 by wangxiyuan Committed by GitHub Jan 10, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 0 deletions

vllm/model_executor/custom_op.py vllm/model_executor/custom_op.py +7 -0

vllm/platforms/interface.py vllm/platforms/interface.py +4 -0

No files found.
--- a/vllm/model_executor/custom_op.py
+++ b/vllm/model_executor/custom_op.py
@@ -57,6 +57,11 @@ class CustomOp(nn.Module):
        # PyTorch-native implementation.
        return self.forward_native(*args, **kwargs)

+    def forward_oot(self, *args, **kwargs):
+        # By default, we assume that OOT ops are compatible with the
+        # PyTorch-native implementation.
+        return self.forward_native(*args, **kwargs)
+
    def dispatch_forward(self):
        # NOTE(woosuk): Here we assume that vLLM was built for only one
        # specific backend. Currently, we do not support dynamic dispatching.
@@ -81,6 +86,8 @@ class CustomOp(nn.Module):
            return self.forward_tpu
        elif current_platform.is_xpu():
            return self.forward_xpu
+        elif current_platform.is_out_of_tree():
+            return self.forward_oot
        else:
            return self.forward_cuda


--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -45,6 +45,7 @@ class PlatformEnum(enum.Enum):
    CPU = enum.auto()
    NEURON = enum.auto()
    OPENVINO = enum.auto()
+    OOT = enum.auto()
    UNSPECIFIED = enum.auto()


@@ -107,6 +108,9 @@ class Platform:
    def is_openvino(self) -> bool:
        return self._enum == PlatformEnum.OPENVINO

+    def is_out_of_tree(self) -> bool:
+        return self._enum == PlatformEnum.OOT
+
    def is_cuda_alike(self) -> bool:
        """Stateless version of :func:`torch.cuda.is_available`."""
        return self._enum in (PlatformEnum.CUDA, PlatformEnum.ROCM)