Commit ed392378 authored by zhuwenwen's avatar zhuwenwen
Browse files

[P/D][Feature]显存调度优化,及时释放tensor

parent 596c18f6
...@@ -215,6 +215,9 @@ class P2pNcclConnector(KVConnectorBase_V1): ...@@ -215,6 +215,9 @@ class P2pNcclConnector(KVConnectorBase_V1):
inject_kv_into_layer(kv_cache_layer, kv_cache, inject_kv_into_layer(kv_cache_layer, kv_cache,
request.slot_mapping, request.request_id) request.slot_mapping, request.request_id)
tensor = self.p2p_nccl_engine.recv_store.pop(request.request_id + "#" + layer_name, None)
if tensor is not None:
del tensor
def wait_for_layer_load(self, layer_name: str) -> None: def wait_for_layer_load(self, layer_name: str) -> None:
"""Blocking until the KV for a specific layer is loaded into vLLM's """Blocking until the KV for a specific layer is loaded into vLLM's
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment