Unverified Commit d4bf085a authored by Kunshang Ji's avatar Kunshang Ji Committed by GitHub
Browse files

[MISC] add support custom_op check (#8557)


Co-authored-by: default avataryoukaichao <youkaichao@126.com>
parent 0057894e
...@@ -36,6 +36,7 @@ from torch.distributed import Backend, ProcessGroup ...@@ -36,6 +36,7 @@ from torch.distributed import Backend, ProcessGroup
import vllm.envs as envs import vllm.envs as envs
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import supports_custom_op
@dataclass @dataclass
...@@ -95,32 +96,33 @@ def _register_group(group: "GroupCoordinator") -> None: ...@@ -95,32 +96,33 @@ def _register_group(group: "GroupCoordinator") -> None:
_groups[group.unique_name] = weakref.ref(group) # type: ignore _groups[group.unique_name] = weakref.ref(group) # type: ignore
@torch.library.custom_op("vllm::inplace_all_reduce", mutates_args=["tensor"]) if supports_custom_op():
def inplace_all_reduce(tensor: torch.Tensor, group_name: str) -> None:
assert group_name in _groups, f"Group {group_name} is not found."
group = _groups[group_name]()
if group is None:
raise ValueError(f"Group {group_name} is destroyed.")
group._all_reduce(tensor)
@torch.library.custom_op("vllm::inplace_all_reduce",
mutates_args=["tensor"])
def inplace_all_reduce(tensor: torch.Tensor, group_name: str) -> None:
assert group_name in _groups, f"Group {group_name} is not found."
group = _groups[group_name]()
if group is None:
raise ValueError(f"Group {group_name} is destroyed.")
group._all_reduce(tensor)
@inplace_all_reduce.register_fake @inplace_all_reduce.register_fake
def _(tensor: torch.Tensor, group_name: str) -> None: def _(tensor: torch.Tensor, group_name: str) -> None:
return return
@torch.library.custom_op("vllm::outplace_all_reduce", mutates_args=[])
def outplace_all_reduce(tensor: torch.Tensor, group_name: str) -> torch.Tensor:
assert group_name in _groups, f"Group {group_name} is not found."
group = _groups[group_name]()
if group is None:
raise ValueError(f"Group {group_name} is destroyed.")
return group._all_reduce(tensor)
@torch.library.custom_op("vllm::outplace_all_reduce", mutates_args=[])
def outplace_all_reduce(tensor: torch.Tensor,
group_name: str) -> torch.Tensor:
assert group_name in _groups, f"Group {group_name} is not found."
group = _groups[group_name]()
if group is None:
raise ValueError(f"Group {group_name} is destroyed.")
return group._all_reduce(tensor)
@outplace_all_reduce.register_fake @outplace_all_reduce.register_fake
def _(tensor: torch.Tensor, group_name: str) -> torch.Tensor: def _(tensor: torch.Tensor, group_name: str) -> torch.Tensor:
return torch.empty_like(tensor) return torch.empty_like(tensor)
class GroupCoordinator: class GroupCoordinator:
...@@ -335,6 +337,9 @@ class GroupCoordinator: ...@@ -335,6 +337,9 @@ class GroupCoordinator:
if self.world_size == 1: if self.world_size == 1:
return input_ return input_
if not supports_custom_op():
return self._all_reduce(input_)
if self.tpu_communicator is not None and \ if self.tpu_communicator is not None and \
not self.tpu_communicator.disabled: not self.tpu_communicator.disabled:
# TPU handles Dynamo with its own logic. # TPU handles Dynamo with its own logic.
......
...@@ -1245,6 +1245,12 @@ def supports_dynamo() -> bool: ...@@ -1245,6 +1245,12 @@ def supports_dynamo() -> bool:
return base_torch_version >= Version("2.4.0") return base_torch_version >= Version("2.4.0")
# Some backends use pytorch version < 2.4.0 which doesn't
# support `torch.library.custom_op`.
def supports_custom_op() -> bool:
return hasattr(torch.library, "custom_op")
class AtomicCounter: class AtomicCounter:
"""An atomic, thread-safe counter""" """An atomic, thread-safe counter"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment