Unverified Commit 1948d0c4 authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

[UX] Defer some imports on CLI paths to save ~2s (#40056)


Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
parent 4c47710b
...@@ -8,7 +8,7 @@ from dataclasses import dataclass ...@@ -8,7 +8,7 @@ from dataclasses import dataclass
from functools import partial from functools import partial
from pathlib import Path from pathlib import Path
from types import TracebackType from types import TracebackType
from typing import ClassVar from typing import TYPE_CHECKING, ClassVar
from typing_extensions import Self, override from typing_extensions import Self, override
...@@ -17,20 +17,8 @@ from vllm.utils.import_utils import PlaceholderModule ...@@ -17,20 +17,8 @@ from vllm.utils.import_utils import PlaceholderModule
from .utils import sanitize_filename from .utils import sanitize_filename
try: if TYPE_CHECKING:
import matplotlib.pyplot as plt
except ImportError:
plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot")
try:
import pandas as pd import pandas as pd
except ImportError:
pd = PlaceholderModule("pandas")
try:
import seaborn as sns
except ImportError:
seaborn = PlaceholderModule("seaborn")
@dataclass @dataclass
...@@ -265,6 +253,20 @@ def _plot_fig( ...@@ -265,6 +253,20 @@ def _plot_fig(
fig_height: float, fig_height: float,
fig_dpi: int, fig_dpi: int,
): ):
# Lazy-import matplotlib/pandas/seaborn
try:
import matplotlib.pyplot as plt
except ImportError:
plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot")
try:
import pandas as pd
except ImportError:
pd = PlaceholderModule("pandas")
try:
import seaborn as sns
except ImportError:
sns = PlaceholderModule("seaborn")
fig_group, fig_data = fig_group_data fig_group, fig_data = fig_group_data
row_groups = full_groupby( row_groups = full_groupby(
......
...@@ -6,7 +6,7 @@ from concurrent.futures import ProcessPoolExecutor ...@@ -6,7 +6,7 @@ from concurrent.futures import ProcessPoolExecutor
from dataclasses import dataclass from dataclasses import dataclass
from functools import partial from functools import partial
from pathlib import Path from pathlib import Path
from typing import ClassVar from typing import TYPE_CHECKING, ClassVar
from vllm.utils.collection_utils import full_groupby from vllm.utils.collection_utils import full_groupby
from vllm.utils.import_utils import PlaceholderModule from vllm.utils.import_utils import PlaceholderModule
...@@ -14,20 +14,8 @@ from vllm.utils.import_utils import PlaceholderModule ...@@ -14,20 +14,8 @@ from vllm.utils.import_utils import PlaceholderModule
from .plot import DummyExecutor, _json_load_bytes from .plot import DummyExecutor, _json_load_bytes
from .utils import sanitize_filename from .utils import sanitize_filename
try: if TYPE_CHECKING:
import matplotlib.pyplot as plt
except ImportError:
plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot")
try:
import pandas as pd import pandas as pd
except ImportError:
pd = PlaceholderModule("pandas")
try:
import seaborn as sns
except ImportError:
seaborn = PlaceholderModule("seaborn")
def _first_present(run_data: dict[str, object], keys: list[str]): def _first_present(run_data: dict[str, object], keys: list[str]):
...@@ -195,6 +183,20 @@ def _plot_fig( ...@@ -195,6 +183,20 @@ def _plot_fig(
print("[END FIGURE]") print("[END FIGURE]")
return return
# Lazy-import matplotlib/pandas/seaborn
try:
import matplotlib.pyplot as plt
except ImportError:
plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot")
try:
import pandas as pd
except ImportError:
pd = PlaceholderModule("pandas")
try:
import seaborn as sns
except ImportError:
sns = PlaceholderModule("seaborn")
df = pd.DataFrame.from_records(fig_data) df = pd.DataFrame.from_records(fig_data)
df = df.dropna(subset=["tokens_per_user", "tokens_per_gpu"]) df = df.dropna(subset=["tokens_per_user", "tokens_per_gpu"])
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm.entrypoints.cli.benchmark.latency import BenchmarkLatencySubcommand
from vllm.entrypoints.cli.benchmark.mm_processor import (
BenchmarkMMProcessorSubcommand,
)
from vllm.entrypoints.cli.benchmark.serve import BenchmarkServingSubcommand
from vllm.entrypoints.cli.benchmark.startup import BenchmarkStartupSubcommand
from vllm.entrypoints.cli.benchmark.sweep import BenchmarkSweepSubcommand
from vllm.entrypoints.cli.benchmark.throughput import BenchmarkThroughputSubcommand
__all__: list[str] = [
"BenchmarkLatencySubcommand",
"BenchmarkMMProcessorSubcommand",
"BenchmarkServingSubcommand",
"BenchmarkStartupSubcommand",
"BenchmarkSweepSubcommand",
"BenchmarkThroughputSubcommand",
]
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import argparse import argparse
import sys
import typing import typing
from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase
...@@ -14,6 +15,17 @@ else: ...@@ -14,6 +15,17 @@ else:
FlexibleArgumentParser = argparse.ArgumentParser FlexibleArgumentParser = argparse.ArgumentParser
def _import_bench_subcommand_modules() -> None:
# Imported lazily so `BenchmarkSubcommandBase` subclasses register only
# when `vllm bench` is actually invoked.
import vllm.entrypoints.cli.benchmark.latency # noqa: F401
import vllm.entrypoints.cli.benchmark.mm_processor # noqa: F401
import vllm.entrypoints.cli.benchmark.serve # noqa: F401
import vllm.entrypoints.cli.benchmark.startup # noqa: F401
import vllm.entrypoints.cli.benchmark.sweep # noqa: F401
import vllm.entrypoints.cli.benchmark.throughput # noqa: F401
class BenchmarkSubcommand(CLISubcommand): class BenchmarkSubcommand(CLISubcommand):
"""The `bench` subcommand for the vLLM CLI.""" """The `bench` subcommand for the vLLM CLI."""
...@@ -38,18 +50,28 @@ class BenchmarkSubcommand(CLISubcommand): ...@@ -38,18 +50,28 @@ class BenchmarkSubcommand(CLISubcommand):
) )
bench_subparsers = bench_parser.add_subparsers(required=True, dest="bench_type") bench_subparsers = bench_parser.add_subparsers(required=True, dest="bench_type")
for cmd_cls in BenchmarkSubcommandBase.__subclasses__(): # Only build the nested bench subparsers when the user is actually
cmd_subparser = bench_subparsers.add_parser( # invoking `bench`; otherwise we'd drag in imports
cmd_cls.name, # unnecessarily on every `vllm --help` and `vllm serve`.
help=cmd_cls.help, # Scan for the first positional arg so global flags (e.g. `-v`)
description=cmd_cls.help, # before the subcommand don't break detection.
usage=f"vllm {self.name} {cmd_cls.name} [options]", first_positional = next(
) (arg for arg in sys.argv[1:] if not arg.startswith("-")), None
cmd_subparser.set_defaults(dispatch_function=cmd_cls.cmd) )
cmd_cls.add_cli_args(cmd_subparser) if first_positional == self.name:
cmd_subparser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format( _import_bench_subcommand_modules()
subcmd=f"{self.name} {cmd_cls.name}" for cmd_cls in BenchmarkSubcommandBase.__subclasses__():
) cmd_subparser = bench_subparsers.add_parser(
cmd_cls.name,
help=cmd_cls.help,
description=cmd_cls.help,
usage=f"vllm {self.name} {cmd_cls.name} [options]",
)
cmd_subparser.set_defaults(dispatch_function=cmd_cls.cmd)
cmd_cls.add_cli_args(cmd_subparser)
cmd_subparser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format(
subcmd=f"{self.name} {cmd_cls.name}"
)
return bench_parser return bench_parser
......
...@@ -100,10 +100,9 @@ logger = init_logger(__name__) ...@@ -100,10 +100,9 @@ logger = init_logger(__name__)
# it avoids unintentional cuda initialization from torch.cuda.is_available() # it avoids unintentional cuda initialization from torch.cuda.is_available()
os.environ["PYTORCH_NVML_BASED_CUDA_CHECK"] = "1" os.environ["PYTORCH_NVML_BASED_CUDA_CHECK"] = "1"
# see https://github.com/vllm-project/vllm/issues/10480 # see https://github.com/vllm-project/vllm/issues/10480 and
# https://github.com/vllm-project/vllm/issues/10619.
os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1" os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1"
# see https://github.com/vllm-project/vllm/issues/10619
torch._inductor.config.compile_threads = 1
# Enable Triton autotuning result caching to disk by default. # Enable Triton autotuning result caching to disk by default.
# Without this, Triton re-runs autotuning on every process restart, # Without this, Triton re-runs autotuning on every process restart,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment