Unverified Commit 8a57872b authored by Tyler Michael Smith's avatar Tyler Michael Smith Committed by GitHub
Browse files

[Bugfix][EP+DP] Use pplx-kernel internode instead of intranode (#19034)


Signed-off-by: default avatarTyler Michael Smith <tysmith@redhat.com>
Signed-off-by: default avatarTyler Michael Smith <tyler@neuralmagic.com>
parent 5bc1ad6c
......@@ -83,6 +83,10 @@ class PPLXAll2AllManager(All2AllManagerBase):
assert has_pplx, "pplx_kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md to install pplx_kernels." # noqa
super().__init__(cpu_group)
# TODO(tms): Disable pplx-a2a intranode as it fails with the error:
# failed: cuda error /app/pplx/csrc/all_to_all/intranode.cpp:84 'invalid resource handle' # noqa
self.internode = True
if self.internode:
# inter-node communication needs nvshmem,
# intra-node communication uses p2p mapping directly
......
......@@ -269,9 +269,13 @@ class FusedMoEMethodBase(QuantizeMethodBase):
hidden_dim_scale_bytes=(0 if moe.in_dtype.itemsize != 1 else (
(moe.hidden_dim + moe.block_size - 1) // moe.block_size *
torch.float32.itemsize)),
group_name=all2all_manager.cpu_group.group_name,
)
# Intranode pplx a2a takes a group name while internode does not.
if not all2all_manager.internode:
all_to_all_args[
"group_name"] = all2all_manager.cpu_group.group_name
handle = all2all_manager.get_handle(all_to_all_args)
prepare_finalize = PplxPrepareAndFinalize(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment