Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
59389c92
Unverified
Commit
59389c92
authored
Jul 10, 2025
by
Nick Hill
Committed by
GitHub
Jul 10, 2025
Browse files
[BugFix][CPU] Fix CPU worker dependency on cumem_allocator (#20696)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
8f2720de
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
1 deletion
+9
-1
vllm/v1/worker/gpu_worker.py
vllm/v1/worker/gpu_worker.py
+9
-1
No files found.
vllm/v1/worker/gpu_worker.py
View file @
59389c92
...
...
@@ -11,7 +11,6 @@ import torch.nn as nn
import
vllm.envs
as
envs
from
vllm.config
import
VllmConfig
from
vllm.device_allocator.cumem
import
CuMemAllocator
from
vllm.distributed
import
(
ensure_model_parallel_initialized
,
init_distributed_environment
,
set_custom_all_reduce
)
...
...
@@ -79,6 +78,8 @@ class Worker(WorkerBase):
self
.
profiler
=
None
def
sleep
(
self
,
level
:
int
=
1
)
->
None
:
from
vllm.device_allocator.cumem
import
CuMemAllocator
free_bytes_before_sleep
=
torch
.
cuda
.
mem_get_info
()[
0
]
# Save the buffers before level 2 sleep
...
...
@@ -101,6 +102,8 @@ class Worker(WorkerBase):
used_bytes
/
GiB_bytes
)
def
wake_up
(
self
,
tags
:
Optional
[
list
[
str
]]
=
None
)
->
None
:
from
vllm.device_allocator.cumem
import
CuMemAllocator
allocator
=
CuMemAllocator
.
get_instance
()
allocator
.
wake_up
(
tags
)
...
...
@@ -174,6 +177,8 @@ class Worker(WorkerBase):
# to hijack tensor allocation.
def
load_model
(
self
)
->
None
:
if
self
.
vllm_config
.
model_config
.
enable_sleep_mode
:
from
vllm.device_allocator.cumem
import
CuMemAllocator
allocator
=
CuMemAllocator
.
get_instance
()
assert
allocator
.
get_current_usage
()
==
0
,
(
"Sleep mode can only be "
...
...
@@ -241,7 +246,10 @@ class Worker(WorkerBase):
def
initialize_from_config
(
self
,
kv_cache_config
:
KVCacheConfig
)
->
None
:
"""Allocate GPU KV cache with the specified kv_cache_config."""
if
self
.
vllm_config
.
model_config
.
enable_sleep_mode
:
from
vllm.device_allocator.cumem
import
CuMemAllocator
allocator
=
CuMemAllocator
.
get_instance
()
context
=
allocator
.
use_memory_pool
(
tag
=
"kv_cache"
)
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment