Unverified Commit 111991fe authored by ZhouXingg's avatar ZhouXingg Committed by GitHub
Browse files

Fix Regression: Disable p2p for 4090 (#531)


Co-authored-by: default avatarQubitium <417764+Qubitium@users.noreply.github.com>
parent a8c787d2
...@@ -241,7 +241,7 @@ class ModelRunner: ...@@ -241,7 +241,7 @@ class ModelRunner:
logger.info(f"[gpu_id={self.gpu_id}] Set cuda device.") logger.info(f"[gpu_id={self.gpu_id}] Set cuda device.")
torch.cuda.set_device(self.gpu_id) torch.cuda.set_device(self.gpu_id)
logger.info(f"[gpu_id={self.gpu_id}] Init nccl begin.") logger.info(f"[gpu_id={self.gpu_id}] Init nccl begin.")
monkey_patch_vllm_p2p_access_check() monkey_patch_vllm_p2p_access_check(self.gpu_id)
init_distributed_environment( init_distributed_environment(
backend="nccl", backend="nccl",
world_size=self.tp_size, world_size=self.tp_size,
......
...@@ -453,14 +453,18 @@ def kill_parent_process(): ...@@ -453,14 +453,18 @@ def kill_parent_process():
os.kill(parent_process.pid, 9) os.kill(parent_process.pid, 9)
def monkey_patch_vllm_p2p_access_check(): def monkey_patch_vllm_p2p_access_check(gpu_id: int):
""" """
Monkey patch the slow p2p access check in vllm. Monkey patch the slow p2p access check in vllm.
NOTE: We assume the p2p access is always allowed, which can be wrong for some setups. NOTE: We assume the p2p access is always allowed, which can be wrong for some setups.
""" """
import vllm.distributed.device_communicators.custom_all_reduce_utils as tgt
setattr(tgt, "gpu_p2p_access_check", lambda *arg, **kwargs: True) # TODO: need a better check than just dev str name match
# compat: skip RTX 40 series as they do not have P2P feature and even checking for them may cause errors
device_name = torch.cuda.get_device_name(gpu_id)
if "RTX 40" not in device_name:
import vllm.distributed.device_communicators.custom_all_reduce_utils as tgt
setattr(tgt, "gpu_p2p_access_check", lambda *arg, **kwargs: True)
API_KEY_HEADER_NAME = "X-API-Key" API_KEY_HEADER_NAME = "X-API-Key"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment