Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f56d2996
Unverified
Commit
f56d2996
authored
Jul 11, 2025
by
lkchen
Committed by
GitHub
Jul 11, 2025
Browse files
[Misc] Respect `no_use_tqdm_on_load` flag while capturing CUDA graph (#20834)
Signed-off-by:
Linkun
<
github@lkchen.net
>
parent
147afb44
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
2 deletions
+5
-2
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+4
-2
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+1
-0
No files found.
vllm/v1/worker/gpu_model_runner.py
View file @
f56d2996
...
...
@@ -2270,8 +2270,10 @@ class GPUModelRunner(LoRAModelRunnerMixin):
# Only rank 0 should print progress bar during capture
compilation_cases
=
reversed
(
self
.
cudagraph_batch_sizes
)
if
is_global_first_rank
():
compilation_cases
=
tqdm
(
list
(
compilation_cases
),
desc
=
"Capturing CUDA graph shapes"
)
compilation_cases
=
tqdm
(
list
(
compilation_cases
),
disable
=
not
self
.
load_config
.
use_tqdm_on_load
,
desc
=
"Capturing CUDA graph shapes"
)
for
num_tokens
in
compilation_cases
:
# We skip EPLB here since we don't want to record dummy metrics
for
_
in
range
(
...
...
vllm/worker/model_runner.py
View file @
f56d2996
...
...
@@ -1587,6 +1587,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
if
get_tensor_model_parallel_rank
()
==
0
:
compilation_cases
=
tqdm
(
list
(
compilation_cases
),
disable
=
not
self
.
load_config
.
use_tqdm_on_load
,
desc
=
"Capturing CUDA graph shapes"
)
for
batch_size
,
use_inputs_embeds
in
compilation_cases
:
attn_metadata
=
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment