Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b2496bb0
Unverified
Commit
b2496bb0
authored
Feb 10, 2025
by
youkaichao
Committed by
GitHub
Feb 10, 2025
Browse files
[core] fix sleep mode and pytorch checkpoint compatibility (#13001)
Signed-off-by:
youkaichao
<
youkaichao@gmail.com
>
parent
44607e07
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
3 deletions
+8
-3
tests/basic_correctness/test_cumem.py
tests/basic_correctness/test_cumem.py
+8
-2
vllm/model_executor/model_loader/weight_utils.py
vllm/model_executor/model_loader/weight_utils.py
+0
-1
No files found.
tests/basic_correctness/test_cumem.py
View file @
b2496bb0
...
@@ -115,10 +115,16 @@ def test_cumem_with_cudagraph():
...
@@ -115,10 +115,16 @@ def test_cumem_with_cudagraph():
@
fork_new_process_for_each_test
@
fork_new_process_for_each_test
def
test_end_to_end
():
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"meta-llama/Llama-3.2-1B"
,
# sleep mode with safetensors
"facebook/opt-125m"
# sleep mode with pytorch checkpoint
])
def
test_end_to_end
(
model
):
free
,
total
=
torch
.
cuda
.
mem_get_info
()
free
,
total
=
torch
.
cuda
.
mem_get_info
()
used_bytes_baseline
=
total
-
free
# in case other process is running
used_bytes_baseline
=
total
-
free
# in case other process is running
llm
=
LLM
(
"meta-llama/Llama-3.2-1B"
,
enable_sleep_mode
=
True
)
llm
=
LLM
(
model
,
enable_sleep_mode
=
True
)
prompt
=
"How are you?"
prompt
=
"How are you?"
sampling_params
=
SamplingParams
(
temperature
=
0
,
max_tokens
=
10
)
sampling_params
=
SamplingParams
(
temperature
=
0
,
max_tokens
=
10
)
output
=
llm
.
generate
(
prompt
,
sampling_params
)
output
=
llm
.
generate
(
prompt
,
sampling_params
)
...
...
vllm/model_executor/model_loader/weight_utils.py
View file @
b2496bb0
...
@@ -462,7 +462,6 @@ def pt_weights_iterator(
...
@@ -462,7 +462,6 @@ def pt_weights_iterator(
state
=
torch
.
load
(
bin_file
,
map_location
=
"cpu"
,
weights_only
=
True
)
state
=
torch
.
load
(
bin_file
,
map_location
=
"cpu"
,
weights_only
=
True
)
yield
from
state
.
items
()
yield
from
state
.
items
()
del
state
del
state
torch
.
cuda
.
empty_cache
()
def
get_gguf_extra_tensor_names
(
def
get_gguf_extra_tensor_names
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment