"tests/kernels/attention/test_encoder_decoder_attn.py" did not exist on "fd95e026e0f9f50bacf1a63ef419df8bacfc99c0"
Commit d9934804 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.9.2-dev' into v0.9.2-step3v

parents 56ebbba3 384b6bd9
...@@ -214,7 +214,7 @@ class P2pNcclConnector(KVConnectorBase_V1): ...@@ -214,7 +214,7 @@ class P2pNcclConnector(KVConnectorBase_V1):
kv_cache_layer = kv_cache[ \ kv_cache_layer = kv_cache[ \
forward_context.virtual_engine] forward_context.virtual_engine]
if self.p2p_nccl_engine.tensor_split_num == P2pNcclEngine.TENSOR_SPLIT_OFF: if not envs.VLLM_P2P_ASYNC:
kv_cache = self.p2p_nccl_engine.recv_tensor( kv_cache = self.p2p_nccl_engine.recv_tensor(
request.request_id + "#" + layer_name) request.request_id + "#" + layer_name)
......
...@@ -63,8 +63,6 @@ def set_p2p_nccl_context(num_channels: str): ...@@ -63,8 +63,6 @@ def set_p2p_nccl_context(num_channels: str):
class P2pNcclEngine: class P2pNcclEngine:
TENSOR_SPLIT_OFF = 0
def __init__(self, def __init__(self,
local_rank: int, local_rank: int,
config: KVTransferConfig, config: KVTransferConfig,
...@@ -368,8 +366,6 @@ class P2pNcclEngine: ...@@ -368,8 +366,6 @@ class P2pNcclEngine:
tensor_id = data["tensor_id"] tensor_id = data["tensor_id"]
if "tensor_split_num" in data: if "tensor_split_num" in data:
self.tensor_split_num = data["tensor_split_num"] self.tensor_split_num = data["tensor_split_num"]
else:
self.tensor_split_num= self.TENSOR_SPLIT_OFF
try: try:
with torch.cuda.stream(self.recv_stream): with torch.cuda.stream(self.recv_stream):
tensor = torch.empty(data["shape"], tensor = torch.empty(data["shape"],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment