Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d15c3b90
Unverified
Commit
d15c3b90
authored
Mar 03, 2026
by
Nick Hill
Committed by
GitHub
Mar 03, 2026
Browse files
[Core] Move save_tensorized_model logic to Worker (#35825)
Signed-off-by:
Nick Hill
<
nickhill123@gmail.com
>
parent
97286a20
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
17 deletions
+6
-17
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+1
-12
vllm/v1/worker/gpu_worker.py
vllm/v1/worker/gpu_worker.py
+5
-5
No files found.
vllm/v1/worker/gpu_model_runner.py
View file @
d15c3b90
...
...
@@ -58,7 +58,7 @@ from vllm.model_executor.layers.rotary_embedding import (
MRotaryEmbedding
,
XDRotaryEmbedding
,
)
from
vllm.model_executor.model_loader
import
TensorizerLoader
,
get_model_loader
from
vllm.model_executor.model_loader
import
get_model_loader
from
vllm.model_executor.model_loader.reload
import
(
finalize_layerwise_reload
,
initialize_layerwise_reload
,
...
...
@@ -194,7 +194,6 @@ from .utils import (
)
if
TYPE_CHECKING
:
from
vllm.model_executor.model_loader.tensorizer
import
TensorizerConfig
from
vllm.v1.core.sched.output
import
GrammarOutput
,
SchedulerOutput
from
vllm.v1.spec_decode.ngram_proposer
import
NgramProposer
...
...
@@ -4510,16 +4509,6 @@ class GPUModelRunner(
weights_not_loaded
,
)
def
save_tensorized_model
(
self
,
tensorizer_config
:
"TensorizerConfig"
,
)
->
None
:
TensorizerLoader
.
save_model
(
self
.
get_model
(),
tensorizer_config
=
tensorizer_config
,
model_config
=
self
.
model_config
,
)
def
_get_prompt_logprobs_dict
(
self
,
hidden_states
:
torch
.
Tensor
,
...
...
vllm/v1/worker/gpu_worker.py
View file @
d15c3b90
...
...
@@ -57,6 +57,7 @@ from vllm.v1.worker.utils import is_residual_scattered_for_sp
from
vllm.v1.worker.worker_base
import
WorkerBase
from
vllm.v1.worker.workspace
import
init_workspace_manager
from
...model_executor.model_loader
import
TensorizerLoader
from
.gpu.warmup
import
warmup_kernels
from
.utils
import
request_memory
...
...
@@ -836,12 +837,11 @@ class Worker(WorkerBase):
max_size
=
max_size
,
)
def
save_tensorized_model
(
self
,
tensorizer_config
:
"TensorizerConfig"
,
)
->
None
:
self
.
model_runner
.
save_tensorized_model
(
def
save_tensorized_model
(
self
,
tensorizer_config
:
"TensorizerConfig"
)
->
None
:
TensorizerLoader
.
save_model
(
self
.
get_model
(),
tensorizer_config
=
tensorizer_config
,
model_config
=
self
.
model_config
,
)
def
init_weight_transfer_engine
(
self
,
init_info
:
dict
)
->
None
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment