@@ -84,10 +84,6 @@ class PPLXAll2AllManager(All2AllManagerBase):
...
@@ -84,10 +84,6 @@ class PPLXAll2AllManager(All2AllManagerBase):
asserthas_pplx,"pplx_kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md to install pplx_kernels."# noqa
asserthas_pplx,"pplx_kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md to install pplx_kernels."# noqa
super().__init__(cpu_group)
super().__init__(cpu_group)
# TODO(tms): Disable pplx-a2a intranode as it fails with the error:
# failed: cuda error /app/pplx/csrc/all_to_all/intranode.cpp:84 'invalid resource handle' # noqa
self.internode=True
ifself.internode:
ifself.internode:
# inter-node communication needs nvshmem,
# inter-node communication needs nvshmem,
# intra-node communication uses p2p mapping directly
# intra-node communication uses p2p mapping directly
...
@@ -178,7 +174,6 @@ class DeepEPHTAll2AllManager(DeepEPAll2AllManagerBase):
...
@@ -178,7 +174,6 @@ class DeepEPHTAll2AllManager(DeepEPAll2AllManagerBase):
num_rdma_bytes=1024*1024*1024
num_rdma_bytes=1024*1024*1024
num_qps_per_rank=self.num_sms//2
num_qps_per_rank=self.num_sms//2
else:
else:
assertself.intranode
num_rdma_bytes=0
num_rdma_bytes=0
num_qps_per_rank=1
num_qps_per_rank=1
...
@@ -243,7 +238,6 @@ class DeepEPLLAll2AllManager(DeepEPAll2AllManagerBase):
...
@@ -243,7 +238,6 @@ class DeepEPLLAll2AllManager(DeepEPAll2AllManagerBase):