Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0c730268
Unverified
Commit
0c730268
authored
Feb 14, 2025
by
Woosuk Kwon
Committed by
GitHub
Feb 14, 2025
Browse files
[V1][PP] Fix memory profiling in PP (#13315)
Signed-off-by:
Woosuk Kwon
<
woosuk.kwon@berkeley.edu
>
parent
6a854c7a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
5 deletions
+6
-5
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+6
-5
No files found.
vllm/v1/worker/gpu_model_runner.py
View file @
0c730268
...
@@ -1158,11 +1158,12 @@ class GPUModelRunner(LoRAModelRunnerMixin):
...
@@ -1158,11 +1158,12 @@ class GPUModelRunner(LoRAModelRunnerMixin):
# Trigger compilation for general shape.
# Trigger compilation for general shape.
hidden_states
=
self
.
_dummy_run
(
self
.
max_num_tokens
,
hidden_states
=
self
.
_dummy_run
(
self
.
max_num_tokens
,
dummy_kv_caches
)
dummy_kv_caches
)
if
not
get_pp_group
().
is_last_rank
:
if
get_pp_group
().
is_last_rank
:
return
hidden_states
hidden_states
=
hidden_states
[
logit_indices
]
hidden_states
=
hidden_states
[
logit_indices
]
logits
=
self
.
model
.
compute_logits
(
hidden_states
,
None
)
logits
=
self
.
model
.
compute_logits
(
hidden_states
,
None
)
# TODO(woosuk): Consider the memory usage of the sampler.
# TODO(woosuk): Consider the memory usage of the sampler.
else
:
logits
=
None
torch
.
cuda
.
synchronize
()
torch
.
cuda
.
synchronize
()
del
hidden_states
,
logits
del
hidden_states
,
logits
self
.
encoder_cache
.
clear
()
self
.
encoder_cache
.
clear
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment