Unverified Commit 7ff65b19 authored by czhu-cohere's avatar czhu-cohere Committed by GitHub
Browse files

[Bugfix] Fix workspace resize leaking reserved GPU memory (#39226)


Signed-off-by: default avatarroot <conway.zhu@cohere.com>
Co-authored-by: default avatarmergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
parent 7f95a66c
...@@ -161,36 +161,33 @@ class WorkspaceManager: ...@@ -161,36 +161,33 @@ class WorkspaceManager:
"Workspace growth is not allowed after locking." "Workspace growth is not allowed after locking."
) )
for ubatch_id in range(self._num_ubatches): # Only resize the requesting ubatch's workspace. Other
current_workspace = self._current_workspaces[ubatch_id] # ubatches resize lazily on their next get_simultaneous call.
if ( # Resizing all ubatches here would orphan the other ubatch's
current_workspace is None # old tensor when it still holds views into it (DBO leak).
or self._workspace_size_bytes(current_workspace) < required_bytes self._current_workspaces[ubatch_id] = None
): del current_workspace
# Delete old tensor before allocating new one to avoid # Release the freed segment back to CUDA so the caching
# memory spike from resize_(). resize_() allocates new # allocator can reuse the GPU memory for the larger
# memory before freeing old, which can cause OOM. # allocation below. Without this, each resize may leave a
# Must clear the list reference first since local var # dead segment in reserved memory which can cause higher peak
# is just a copy of the reference. # memory usage.
self._current_workspaces[ubatch_id] = None torch.accelerator.empty_cache()
del current_workspace self._current_workspaces[ubatch_id] = torch.empty(
self._current_workspaces[ubatch_id] = torch.empty( (required_bytes,), dtype=torch.uint8, device=self._device
(required_bytes,), dtype=torch.uint8, device=self._device )
) current_workspace = self._current_workspaces[ubatch_id]
if envs.VLLM_DEBUG_WORKSPACE: if envs.VLLM_DEBUG_WORKSPACE:
logger.info( logger.info(
"[WORKSPACE DEBUG] Resized workspace from '%s': %.2f MB -> " "[WORKSPACE DEBUG] Resized workspace from '%s': %.2f MB -> "
"%.2f MB (%d ubatches, total memory %.2f MB)", "%.2f MB (ubatch %d)",
get_caller_info(), get_caller_info(),
current_size / _MB, current_size / _MB,
required_bytes / _MB, required_bytes / _MB,
self._num_ubatches, ubatch_id,
required_bytes * self._num_ubatches / _MB,
) )
current_workspace = self._current_workspaces[dbo_current_ubatch_id()]
return current_workspace return current_workspace
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment