Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b880ffb8
Unverified
Commit
b880ffb8
authored
Dec 19, 2024
by
Michael Goin
Committed by
GitHub
Dec 20, 2024
Browse files
[Misc] Add tqdm progress bar during graph capture (#11349)
Signed-off-by:
mgoin
<
michael@neuralmagic.com
>
parent
7801f56e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
5 deletions
+13
-5
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+13
-5
No files found.
vllm/worker/model_runner.py
View file @
b880ffb8
...
@@ -13,6 +13,7 @@ import numpy as np
...
@@ -13,6 +13,7 @@ import numpy as np
import
torch
import
torch
import
torch.distributed
import
torch.distributed
import
torch.nn
as
nn
import
torch.nn
as
nn
from
tqdm
import
tqdm
import
vllm.envs
as
envs
import
vllm.envs
as
envs
from
vllm.attention
import
AttentionMetadata
,
get_attn_backend
from
vllm.attention
import
AttentionMetadata
,
get_attn_backend
...
@@ -21,7 +22,8 @@ from vllm.attention.backends.utils import CommonAttentionState
...
@@ -21,7 +22,8 @@ from vllm.attention.backends.utils import CommonAttentionState
from
vllm.config
import
CompilationLevel
,
VllmConfig
from
vllm.config
import
CompilationLevel
,
VllmConfig
from
vllm.core.scheduler
import
SchedulerOutputs
from
vllm.core.scheduler
import
SchedulerOutputs
from
vllm.distributed
import
get_kv_transfer_group
,
get_pp_group
from
vllm.distributed
import
get_kv_transfer_group
,
get_pp_group
from
vllm.distributed.parallel_state
import
graph_capture
from
vllm.distributed.parallel_state
import
(
get_tensor_model_parallel_rank
,
graph_capture
)
from
vllm.forward_context
import
set_forward_context
from
vllm.forward_context
import
set_forward_context
from
vllm.inputs
import
INPUT_REGISTRY
,
InputRegistry
from
vllm.inputs
import
INPUT_REGISTRY
,
InputRegistry
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
...
@@ -1413,8 +1415,8 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
...
@@ -1413,8 +1415,8 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
logger
.
info
(
"Capturing cudagraphs for decoding. This may lead to "
logger
.
info
(
"Capturing cudagraphs for decoding. This may lead to "
"unexpected consequences if the model is not static. To "
"unexpected consequences if the model is not static. To "
"run the model in eager mode, set 'enforce_eager=True' or "
"run the model in eager mode, set 'enforce_eager=True' or "
"use '--enforce-eager' in the CLI."
)
"use '--enforce-eager' in the CLI.
"
logger
.
info
(
"If out-of-memory error occurs during cudagraph capture,"
"If out-of-memory error occurs during cudagraph capture,"
" consider decreasing `gpu_memory_utilization` or "
" consider decreasing `gpu_memory_utilization` or "
"switching to eager mode. You can also reduce the "
"switching to eager mode. You can also reduce the "
"`max_num_seqs` as needed to decrease memory usage."
)
"`max_num_seqs` as needed to decrease memory usage."
)
...
@@ -1451,8 +1453,14 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
...
@@ -1451,8 +1453,14 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
# memory usage of CUDA graph.
# memory usage of CUDA graph.
for
virtual_engine
in
range
(
for
virtual_engine
in
range
(
self
.
parallel_config
.
pipeline_parallel_size
):
self
.
parallel_config
.
pipeline_parallel_size
):
for
batch_size
in
\
# Only rank 0 should print progress bar during capture
self
.
vllm_config
.
compilation_config
.
capture_sizes
:
capture_sizes
=
(
tqdm
(
self
.
vllm_config
.
compilation_config
.
capture_sizes
,
desc
=
"Capturing CUDA graph shapes"
,
)
if
get_tensor_model_parallel_rank
()
==
0
else
self
.
vllm_config
.
compilation_config
.
capture_sizes
)
for
batch_size
in
capture_sizes
:
attn_metadata
=
(
attn_metadata
=
(
self
.
attn_state
.
graph_capture_get_metadata_for_batch
(
self
.
attn_state
.
graph_capture_get_metadata_for_batch
(
batch_size
,
batch_size
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment