Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1948d0c4
Unverified
Commit
1948d0c4
authored
Apr 16, 2026
by
Michael Goin
Committed by
GitHub
Apr 16, 2026
Browse files
[UX] Defer some imports on CLI paths to save ~2s (#40056)
Signed-off-by:
mgoin
<
mgoin64@gmail.com
>
parent
4c47710b
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
68 additions
and
60 deletions
+68
-60
vllm/benchmarks/sweep/plot.py
vllm/benchmarks/sweep/plot.py
+16
-14
vllm/benchmarks/sweep/plot_pareto.py
vllm/benchmarks/sweep/plot_pareto.py
+16
-14
vllm/entrypoints/cli/__init__.py
vllm/entrypoints/cli/__init__.py
+0
-17
vllm/entrypoints/cli/benchmark/main.py
vllm/entrypoints/cli/benchmark/main.py
+34
-12
vllm/env_override.py
vllm/env_override.py
+2
-3
No files found.
vllm/benchmarks/sweep/plot.py
View file @
1948d0c4
...
...
@@ -8,7 +8,7 @@ from dataclasses import dataclass
from
functools
import
partial
from
pathlib
import
Path
from
types
import
TracebackType
from
typing
import
ClassVar
from
typing
import
TYPE_CHECKING
,
ClassVar
from
typing_extensions
import
Self
,
override
...
...
@@ -17,20 +17,8 @@ from vllm.utils.import_utils import PlaceholderModule
from
.utils
import
sanitize_filename
try
:
import
matplotlib.pyplot
as
plt
except
ImportError
:
plt
=
PlaceholderModule
(
"matplotlib"
).
placeholder_attr
(
"pyplot"
)
try
:
if
TYPE_CHECKING
:
import
pandas
as
pd
except
ImportError
:
pd
=
PlaceholderModule
(
"pandas"
)
try
:
import
seaborn
as
sns
except
ImportError
:
seaborn
=
PlaceholderModule
(
"seaborn"
)
@
dataclass
...
...
@@ -265,6 +253,20 @@ def _plot_fig(
fig_height
:
float
,
fig_dpi
:
int
,
):
# Lazy-import matplotlib/pandas/seaborn
try
:
import
matplotlib.pyplot
as
plt
except
ImportError
:
plt
=
PlaceholderModule
(
"matplotlib"
).
placeholder_attr
(
"pyplot"
)
try
:
import
pandas
as
pd
except
ImportError
:
pd
=
PlaceholderModule
(
"pandas"
)
try
:
import
seaborn
as
sns
except
ImportError
:
sns
=
PlaceholderModule
(
"seaborn"
)
fig_group
,
fig_data
=
fig_group_data
row_groups
=
full_groupby
(
...
...
vllm/benchmarks/sweep/plot_pareto.py
View file @
1948d0c4
...
...
@@ -6,7 +6,7 @@ from concurrent.futures import ProcessPoolExecutor
from
dataclasses
import
dataclass
from
functools
import
partial
from
pathlib
import
Path
from
typing
import
ClassVar
from
typing
import
TYPE_CHECKING
,
ClassVar
from
vllm.utils.collection_utils
import
full_groupby
from
vllm.utils.import_utils
import
PlaceholderModule
...
...
@@ -14,20 +14,8 @@ from vllm.utils.import_utils import PlaceholderModule
from
.plot
import
DummyExecutor
,
_json_load_bytes
from
.utils
import
sanitize_filename
try
:
import
matplotlib.pyplot
as
plt
except
ImportError
:
plt
=
PlaceholderModule
(
"matplotlib"
).
placeholder_attr
(
"pyplot"
)
try
:
if
TYPE_CHECKING
:
import
pandas
as
pd
except
ImportError
:
pd
=
PlaceholderModule
(
"pandas"
)
try
:
import
seaborn
as
sns
except
ImportError
:
seaborn
=
PlaceholderModule
(
"seaborn"
)
def
_first_present
(
run_data
:
dict
[
str
,
object
],
keys
:
list
[
str
]):
...
...
@@ -195,6 +183,20 @@ def _plot_fig(
print
(
"[END FIGURE]"
)
return
# Lazy-import matplotlib/pandas/seaborn
try
:
import
matplotlib.pyplot
as
plt
except
ImportError
:
plt
=
PlaceholderModule
(
"matplotlib"
).
placeholder_attr
(
"pyplot"
)
try
:
import
pandas
as
pd
except
ImportError
:
pd
=
PlaceholderModule
(
"pandas"
)
try
:
import
seaborn
as
sns
except
ImportError
:
sns
=
PlaceholderModule
(
"seaborn"
)
df
=
pd
.
DataFrame
.
from_records
(
fig_data
)
df
=
df
.
dropna
(
subset
=
[
"tokens_per_user"
,
"tokens_per_gpu"
])
...
...
vllm/entrypoints/cli/__init__.py
View file @
1948d0c4
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
vllm.entrypoints.cli.benchmark.latency
import
BenchmarkLatencySubcommand
from
vllm.entrypoints.cli.benchmark.mm_processor
import
(
BenchmarkMMProcessorSubcommand
,
)
from
vllm.entrypoints.cli.benchmark.serve
import
BenchmarkServingSubcommand
from
vllm.entrypoints.cli.benchmark.startup
import
BenchmarkStartupSubcommand
from
vllm.entrypoints.cli.benchmark.sweep
import
BenchmarkSweepSubcommand
from
vllm.entrypoints.cli.benchmark.throughput
import
BenchmarkThroughputSubcommand
__all__
:
list
[
str
]
=
[
"BenchmarkLatencySubcommand"
,
"BenchmarkMMProcessorSubcommand"
,
"BenchmarkServingSubcommand"
,
"BenchmarkStartupSubcommand"
,
"BenchmarkSweepSubcommand"
,
"BenchmarkThroughputSubcommand"
,
]
vllm/entrypoints/cli/benchmark/main.py
View file @
1948d0c4
...
...
@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
argparse
import
sys
import
typing
from
vllm.entrypoints.cli.benchmark.base
import
BenchmarkSubcommandBase
...
...
@@ -14,6 +15,17 @@ else:
FlexibleArgumentParser
=
argparse
.
ArgumentParser
def
_import_bench_subcommand_modules
()
->
None
:
# Imported lazily so `BenchmarkSubcommandBase` subclasses register only
# when `vllm bench` is actually invoked.
import
vllm.entrypoints.cli.benchmark.latency
# noqa: F401
import
vllm.entrypoints.cli.benchmark.mm_processor
# noqa: F401
import
vllm.entrypoints.cli.benchmark.serve
# noqa: F401
import
vllm.entrypoints.cli.benchmark.startup
# noqa: F401
import
vllm.entrypoints.cli.benchmark.sweep
# noqa: F401
import
vllm.entrypoints.cli.benchmark.throughput
# noqa: F401
class
BenchmarkSubcommand
(
CLISubcommand
):
"""The `bench` subcommand for the vLLM CLI."""
...
...
@@ -38,6 +50,16 @@ class BenchmarkSubcommand(CLISubcommand):
)
bench_subparsers
=
bench_parser
.
add_subparsers
(
required
=
True
,
dest
=
"bench_type"
)
# Only build the nested bench subparsers when the user is actually
# invoking `bench`; otherwise we'd drag in imports
# unnecessarily on every `vllm --help` and `vllm serve`.
# Scan for the first positional arg so global flags (e.g. `-v`)
# before the subcommand don't break detection.
first_positional
=
next
(
(
arg
for
arg
in
sys
.
argv
[
1
:]
if
not
arg
.
startswith
(
"-"
)),
None
)
if
first_positional
==
self
.
name
:
_import_bench_subcommand_modules
()
for
cmd_cls
in
BenchmarkSubcommandBase
.
__subclasses__
():
cmd_subparser
=
bench_subparsers
.
add_parser
(
cmd_cls
.
name
,
...
...
vllm/env_override.py
View file @
1948d0c4
...
...
@@ -100,10 +100,9 @@ logger = init_logger(__name__)
# it avoids unintentional cuda initialization from torch.cuda.is_available()
os
.
environ
[
"PYTORCH_NVML_BASED_CUDA_CHECK"
]
=
"1"
# see https://github.com/vllm-project/vllm/issues/10480
# see https://github.com/vllm-project/vllm/issues/10480 and
# https://github.com/vllm-project/vllm/issues/10619.
os
.
environ
[
"TORCHINDUCTOR_COMPILE_THREADS"
]
=
"1"
# see https://github.com/vllm-project/vllm/issues/10619
torch
.
_inductor
.
config
.
compile_threads
=
1
# Enable Triton autotuning result caching to disk by default.
# Without this, Triton re-runs autotuning on every process restart,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment