Commit 384b6bd9 authored by zhuwenwen's avatar zhuwenwen
Browse files

修复pd分离开启异步发送时decode侧判断条件卡住

parent 72ada8dc
...@@ -214,7 +214,7 @@ class P2pNcclConnector(KVConnectorBase_V1): ...@@ -214,7 +214,7 @@ class P2pNcclConnector(KVConnectorBase_V1):
kv_cache_layer = kv_cache[ \ kv_cache_layer = kv_cache[ \
forward_context.virtual_engine] forward_context.virtual_engine]
if self.p2p_nccl_engine.tensor_split_num == P2pNcclEngine.TENSOR_SPLIT_OFF: if not envs.VLLM_P2P_ASYNC:
kv_cache = self.p2p_nccl_engine.recv_tensor( kv_cache = self.p2p_nccl_engine.recv_tensor(
request.request_id + "#" + layer_name) request.request_id + "#" + layer_name)
......
...@@ -62,8 +62,6 @@ def set_p2p_nccl_context(num_channels: str): ...@@ -62,8 +62,6 @@ def set_p2p_nccl_context(num_channels: str):
class P2pNcclEngine: class P2pNcclEngine:
TENSOR_SPLIT_OFF = 0
def __init__(self, def __init__(self,
local_rank: int, local_rank: int,
...@@ -368,8 +366,6 @@ class P2pNcclEngine: ...@@ -368,8 +366,6 @@ class P2pNcclEngine:
tensor_id = data["tensor_id"] tensor_id = data["tensor_id"]
if "tensor_split_num" in data: if "tensor_split_num" in data:
self.tensor_split_num = data["tensor_split_num"] self.tensor_split_num = data["tensor_split_num"]
else:
self.tensor_split_num= self.TENSOR_SPLIT_OFF
try: try:
with torch.cuda.stream(self.recv_stream): with torch.cuda.stream(self.recv_stream):
tensor = torch.empty(data["shape"], tensor = torch.empty(data["shape"],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment