Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1948d0c4
Unverified
Commit
1948d0c4
authored
Apr 16, 2026
by
Michael Goin
Committed by
GitHub
Apr 16, 2026
Browse files
[UX] Defer some imports on CLI paths to save ~2s (#40056)
Signed-off-by:
mgoin
<
mgoin64@gmail.com
>
parent
4c47710b
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
68 additions
and
60 deletions
+68
-60
vllm/benchmarks/sweep/plot.py
vllm/benchmarks/sweep/plot.py
+16
-14
vllm/benchmarks/sweep/plot_pareto.py
vllm/benchmarks/sweep/plot_pareto.py
+16
-14
vllm/entrypoints/cli/__init__.py
vllm/entrypoints/cli/__init__.py
+0
-17
vllm/entrypoints/cli/benchmark/main.py
vllm/entrypoints/cli/benchmark/main.py
+34
-12
vllm/env_override.py
vllm/env_override.py
+2
-3
No files found.
vllm/benchmarks/sweep/plot.py
View file @
1948d0c4
...
@@ -8,7 +8,7 @@ from dataclasses import dataclass
...
@@ -8,7 +8,7 @@ from dataclasses import dataclass
from
functools
import
partial
from
functools
import
partial
from
pathlib
import
Path
from
pathlib
import
Path
from
types
import
TracebackType
from
types
import
TracebackType
from
typing
import
ClassVar
from
typing
import
TYPE_CHECKING
,
ClassVar
from
typing_extensions
import
Self
,
override
from
typing_extensions
import
Self
,
override
...
@@ -17,20 +17,8 @@ from vllm.utils.import_utils import PlaceholderModule
...
@@ -17,20 +17,8 @@ from vllm.utils.import_utils import PlaceholderModule
from
.utils
import
sanitize_filename
from
.utils
import
sanitize_filename
try
:
if
TYPE_CHECKING
:
import
matplotlib.pyplot
as
plt
except
ImportError
:
plt
=
PlaceholderModule
(
"matplotlib"
).
placeholder_attr
(
"pyplot"
)
try
:
import
pandas
as
pd
import
pandas
as
pd
except
ImportError
:
pd
=
PlaceholderModule
(
"pandas"
)
try
:
import
seaborn
as
sns
except
ImportError
:
seaborn
=
PlaceholderModule
(
"seaborn"
)
@
dataclass
@
dataclass
...
@@ -265,6 +253,20 @@ def _plot_fig(
...
@@ -265,6 +253,20 @@ def _plot_fig(
fig_height
:
float
,
fig_height
:
float
,
fig_dpi
:
int
,
fig_dpi
:
int
,
):
):
# Lazy-import matplotlib/pandas/seaborn
try
:
import
matplotlib.pyplot
as
plt
except
ImportError
:
plt
=
PlaceholderModule
(
"matplotlib"
).
placeholder_attr
(
"pyplot"
)
try
:
import
pandas
as
pd
except
ImportError
:
pd
=
PlaceholderModule
(
"pandas"
)
try
:
import
seaborn
as
sns
except
ImportError
:
sns
=
PlaceholderModule
(
"seaborn"
)
fig_group
,
fig_data
=
fig_group_data
fig_group
,
fig_data
=
fig_group_data
row_groups
=
full_groupby
(
row_groups
=
full_groupby
(
...
...
vllm/benchmarks/sweep/plot_pareto.py
View file @
1948d0c4
...
@@ -6,7 +6,7 @@ from concurrent.futures import ProcessPoolExecutor
...
@@ -6,7 +6,7 @@ from concurrent.futures import ProcessPoolExecutor
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
functools
import
partial
from
functools
import
partial
from
pathlib
import
Path
from
pathlib
import
Path
from
typing
import
ClassVar
from
typing
import
TYPE_CHECKING
,
ClassVar
from
vllm.utils.collection_utils
import
full_groupby
from
vllm.utils.collection_utils
import
full_groupby
from
vllm.utils.import_utils
import
PlaceholderModule
from
vllm.utils.import_utils
import
PlaceholderModule
...
@@ -14,20 +14,8 @@ from vllm.utils.import_utils import PlaceholderModule
...
@@ -14,20 +14,8 @@ from vllm.utils.import_utils import PlaceholderModule
from
.plot
import
DummyExecutor
,
_json_load_bytes
from
.plot
import
DummyExecutor
,
_json_load_bytes
from
.utils
import
sanitize_filename
from
.utils
import
sanitize_filename
try
:
if
TYPE_CHECKING
:
import
matplotlib.pyplot
as
plt
except
ImportError
:
plt
=
PlaceholderModule
(
"matplotlib"
).
placeholder_attr
(
"pyplot"
)
try
:
import
pandas
as
pd
import
pandas
as
pd
except
ImportError
:
pd
=
PlaceholderModule
(
"pandas"
)
try
:
import
seaborn
as
sns
except
ImportError
:
seaborn
=
PlaceholderModule
(
"seaborn"
)
def
_first_present
(
run_data
:
dict
[
str
,
object
],
keys
:
list
[
str
]):
def
_first_present
(
run_data
:
dict
[
str
,
object
],
keys
:
list
[
str
]):
...
@@ -195,6 +183,20 @@ def _plot_fig(
...
@@ -195,6 +183,20 @@ def _plot_fig(
print
(
"[END FIGURE]"
)
print
(
"[END FIGURE]"
)
return
return
# Lazy-import matplotlib/pandas/seaborn
try
:
import
matplotlib.pyplot
as
plt
except
ImportError
:
plt
=
PlaceholderModule
(
"matplotlib"
).
placeholder_attr
(
"pyplot"
)
try
:
import
pandas
as
pd
except
ImportError
:
pd
=
PlaceholderModule
(
"pandas"
)
try
:
import
seaborn
as
sns
except
ImportError
:
sns
=
PlaceholderModule
(
"seaborn"
)
df
=
pd
.
DataFrame
.
from_records
(
fig_data
)
df
=
pd
.
DataFrame
.
from_records
(
fig_data
)
df
=
df
.
dropna
(
subset
=
[
"tokens_per_user"
,
"tokens_per_gpu"
])
df
=
df
.
dropna
(
subset
=
[
"tokens_per_user"
,
"tokens_per_gpu"
])
...
...
vllm/entrypoints/cli/__init__.py
View file @
1948d0c4
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
vllm.entrypoints.cli.benchmark.latency
import
BenchmarkLatencySubcommand
from
vllm.entrypoints.cli.benchmark.mm_processor
import
(
BenchmarkMMProcessorSubcommand
,
)
from
vllm.entrypoints.cli.benchmark.serve
import
BenchmarkServingSubcommand
from
vllm.entrypoints.cli.benchmark.startup
import
BenchmarkStartupSubcommand
from
vllm.entrypoints.cli.benchmark.sweep
import
BenchmarkSweepSubcommand
from
vllm.entrypoints.cli.benchmark.throughput
import
BenchmarkThroughputSubcommand
__all__
:
list
[
str
]
=
[
"BenchmarkLatencySubcommand"
,
"BenchmarkMMProcessorSubcommand"
,
"BenchmarkServingSubcommand"
,
"BenchmarkStartupSubcommand"
,
"BenchmarkSweepSubcommand"
,
"BenchmarkThroughputSubcommand"
,
]
vllm/entrypoints/cli/benchmark/main.py
View file @
1948d0c4
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
argparse
import
argparse
import
sys
import
typing
import
typing
from
vllm.entrypoints.cli.benchmark.base
import
BenchmarkSubcommandBase
from
vllm.entrypoints.cli.benchmark.base
import
BenchmarkSubcommandBase
...
@@ -14,6 +15,17 @@ else:
...
@@ -14,6 +15,17 @@ else:
FlexibleArgumentParser
=
argparse
.
ArgumentParser
FlexibleArgumentParser
=
argparse
.
ArgumentParser
def
_import_bench_subcommand_modules
()
->
None
:
# Imported lazily so `BenchmarkSubcommandBase` subclasses register only
# when `vllm bench` is actually invoked.
import
vllm.entrypoints.cli.benchmark.latency
# noqa: F401
import
vllm.entrypoints.cli.benchmark.mm_processor
# noqa: F401
import
vllm.entrypoints.cli.benchmark.serve
# noqa: F401
import
vllm.entrypoints.cli.benchmark.startup
# noqa: F401
import
vllm.entrypoints.cli.benchmark.sweep
# noqa: F401
import
vllm.entrypoints.cli.benchmark.throughput
# noqa: F401
class
BenchmarkSubcommand
(
CLISubcommand
):
class
BenchmarkSubcommand
(
CLISubcommand
):
"""The `bench` subcommand for the vLLM CLI."""
"""The `bench` subcommand for the vLLM CLI."""
...
@@ -38,18 +50,28 @@ class BenchmarkSubcommand(CLISubcommand):
...
@@ -38,18 +50,28 @@ class BenchmarkSubcommand(CLISubcommand):
)
)
bench_subparsers
=
bench_parser
.
add_subparsers
(
required
=
True
,
dest
=
"bench_type"
)
bench_subparsers
=
bench_parser
.
add_subparsers
(
required
=
True
,
dest
=
"bench_type"
)
for
cmd_cls
in
BenchmarkSubcommandBase
.
__subclasses__
():
# Only build the nested bench subparsers when the user is actually
cmd_subparser
=
bench_subparsers
.
add_parser
(
# invoking `bench`; otherwise we'd drag in imports
cmd_cls
.
name
,
# unnecessarily on every `vllm --help` and `vllm serve`.
help
=
cmd_cls
.
help
,
# Scan for the first positional arg so global flags (e.g. `-v`)
description
=
cmd_cls
.
help
,
# before the subcommand don't break detection.
usage
=
f
"vllm
{
self
.
name
}
{
cmd_cls
.
name
}
[options]"
,
first_positional
=
next
(
)
(
arg
for
arg
in
sys
.
argv
[
1
:]
if
not
arg
.
startswith
(
"-"
)),
None
cmd_subparser
.
set_defaults
(
dispatch_function
=
cmd_cls
.
cmd
)
)
cmd_cls
.
add_cli_args
(
cmd_subparser
)
if
first_positional
==
self
.
name
:
cmd_subparser
.
epilog
=
VLLM_SUBCMD_PARSER_EPILOG
.
format
(
_import_bench_subcommand_modules
()
subcmd
=
f
"
{
self
.
name
}
{
cmd_cls
.
name
}
"
for
cmd_cls
in
BenchmarkSubcommandBase
.
__subclasses__
():
)
cmd_subparser
=
bench_subparsers
.
add_parser
(
cmd_cls
.
name
,
help
=
cmd_cls
.
help
,
description
=
cmd_cls
.
help
,
usage
=
f
"vllm
{
self
.
name
}
{
cmd_cls
.
name
}
[options]"
,
)
cmd_subparser
.
set_defaults
(
dispatch_function
=
cmd_cls
.
cmd
)
cmd_cls
.
add_cli_args
(
cmd_subparser
)
cmd_subparser
.
epilog
=
VLLM_SUBCMD_PARSER_EPILOG
.
format
(
subcmd
=
f
"
{
self
.
name
}
{
cmd_cls
.
name
}
"
)
return
bench_parser
return
bench_parser
...
...
vllm/env_override.py
View file @
1948d0c4
...
@@ -100,10 +100,9 @@ logger = init_logger(__name__)
...
@@ -100,10 +100,9 @@ logger = init_logger(__name__)
# it avoids unintentional cuda initialization from torch.cuda.is_available()
# it avoids unintentional cuda initialization from torch.cuda.is_available()
os
.
environ
[
"PYTORCH_NVML_BASED_CUDA_CHECK"
]
=
"1"
os
.
environ
[
"PYTORCH_NVML_BASED_CUDA_CHECK"
]
=
"1"
# see https://github.com/vllm-project/vllm/issues/10480
# see https://github.com/vllm-project/vllm/issues/10480 and
# https://github.com/vllm-project/vllm/issues/10619.
os
.
environ
[
"TORCHINDUCTOR_COMPILE_THREADS"
]
=
"1"
os
.
environ
[
"TORCHINDUCTOR_COMPILE_THREADS"
]
=
"1"
# see https://github.com/vllm-project/vllm/issues/10619
torch
.
_inductor
.
config
.
compile_threads
=
1
# Enable Triton autotuning result caching to disk by default.
# Enable Triton autotuning result caching to disk by default.
# Without this, Triton re-runs autotuning on every process restart,
# Without this, Triton re-runs autotuning on every process restart,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment