Unverified Commit 7ff65b19 authored by czhu-cohere's avatar czhu-cohere Committed by GitHub
Browse files

[Bugfix] Fix workspace resize leaking reserved GPU memory (#39226)


Signed-off-by: default avatarroot <conway.zhu@cohere.com>
Co-authored-by: default avatarmergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
parent 7f95a66c
......@@ -161,36 +161,33 @@ class WorkspaceManager:
"Workspace growth is not allowed after locking."
)
for ubatch_id in range(self._num_ubatches):
current_workspace = self._current_workspaces[ubatch_id]
if (
current_workspace is None
or self._workspace_size_bytes(current_workspace) < required_bytes
):
# Delete old tensor before allocating new one to avoid
# memory spike from resize_(). resize_() allocates new
# memory before freeing old, which can cause OOM.
# Must clear the list reference first since local var
# is just a copy of the reference.
# Only resize the requesting ubatch's workspace. Other
# ubatches resize lazily on their next get_simultaneous call.
# Resizing all ubatches here would orphan the other ubatch's
# old tensor when it still holds views into it (DBO leak).
self._current_workspaces[ubatch_id] = None
del current_workspace
# Release the freed segment back to CUDA so the caching
# allocator can reuse the GPU memory for the larger
# allocation below. Without this, each resize may leave a
# dead segment in reserved memory which can cause higher peak
# memory usage.
torch.accelerator.empty_cache()
self._current_workspaces[ubatch_id] = torch.empty(
(required_bytes,), dtype=torch.uint8, device=self._device
)
current_workspace = self._current_workspaces[ubatch_id]
if envs.VLLM_DEBUG_WORKSPACE:
logger.info(
"[WORKSPACE DEBUG] Resized workspace from '%s': %.2f MB -> "
"%.2f MB (%d ubatches, total memory %.2f MB)",
"%.2f MB (ubatch %d)",
get_caller_info(),
current_size / _MB,
required_bytes / _MB,
self._num_ubatches,
required_bytes * self._num_ubatches / _MB,
ubatch_id,
)
current_workspace = self._current_workspaces[dbo_current_ubatch_id()]
return current_workspace
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment