Commit bfe12894 authored by xuxzh1's avatar xuxzh1 🎱
Browse files

完善释放包括cpu和tensor_id释放

parent 0627b53a
...@@ -215,9 +215,18 @@ class P2pNcclConnector(KVConnectorBase_V1): ...@@ -215,9 +215,18 @@ class P2pNcclConnector(KVConnectorBase_V1):
inject_kv_into_layer(kv_cache_layer, kv_cache, inject_kv_into_layer(kv_cache_layer, kv_cache,
request.slot_mapping, request.request_id) request.slot_mapping, request.request_id)
tensor = self.p2p_nccl_engine.recv_store.pop(request.request_id + "#" + layer_name, None) tensor_id = request.request_id + "#" + layer_name
if tensor is not None: if tensor_id in self.p2p_nccl_engine.recv_store:
del tensor tensor = self.p2p_nccl_engine.recv_store.pop(tensor_id, None)
self.p2p_nccl_engine.send_request_id_to_tensor_ids.pop(
request.request_id, None)
self.p2p_nccl_engine.recv_request_id_to_tensor_ids.pop(
request.request_id, None)
addr = 0
if isinstance(tensor, tuple):
addr, _, _ = tensor
self.p2p_nccl_engine.pool.free(addr)
def wait_for_layer_load(self, layer_name: str) -> None: def wait_for_layer_load(self, layer_name: str) -> None:
"""Blocking until the KV for a specific layer is loaded into vLLM's """Blocking until the KV for a specific layer is loaded into vLLM's
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment