Unverified Commit 21da7334 authored by Roger Wang's avatar Roger Wang Committed by GitHub
Browse files

[Misc] Clean up flags in `vllm bench serve` (#25138)


Signed-off-by: default avatarRoger Wang <hey@rogerw.io>
parent 66072b36
...@@ -156,7 +156,6 @@ vllm serve Qwen/Qwen2-VL-7B-Instruct ...@@ -156,7 +156,6 @@ vllm serve Qwen/Qwen2-VL-7B-Instruct
```bash ```bash
vllm bench serve \ vllm bench serve \
--backend openai-chat \ --backend openai-chat \
--endpoint-type openai-chat \
--model Qwen/Qwen2-VL-7B-Instruct \ --model Qwen/Qwen2-VL-7B-Instruct \
--endpoint /v1/chat/completions \ --endpoint /v1/chat/completions \
--dataset-name hf \ --dataset-name hf \
...@@ -230,7 +229,6 @@ vllm serve Qwen/Qwen2-VL-7B-Instruct ...@@ -230,7 +229,6 @@ vllm serve Qwen/Qwen2-VL-7B-Instruct
```bash ```bash
vllm bench serve \ vllm bench serve \
--backend openai-chat \ --backend openai-chat \
--endpoint-type openai-chat \
--model Qwen/Qwen2-VL-7B-Instruct \ --model Qwen/Qwen2-VL-7B-Instruct \
--endpoint /v1/chat/completions \ --endpoint /v1/chat/completions \
--dataset-name hf \ --dataset-name hf \
...@@ -245,7 +243,6 @@ vllm bench serve \ ...@@ -245,7 +243,6 @@ vllm bench serve \
```bash ```bash
vllm bench serve \ vllm bench serve \
--backend openai-chat \ --backend openai-chat \
--endpoint-type openai-chat \
--model Qwen/Qwen2-VL-7B-Instruct \ --model Qwen/Qwen2-VL-7B-Instruct \
--endpoint /v1/chat/completions \ --endpoint /v1/chat/completions \
--dataset-name hf \ --dataset-name hf \
......
...@@ -68,7 +68,7 @@ def test_bench_serve_chat(server): ...@@ -68,7 +68,7 @@ def test_bench_serve_chat(server):
"5", "5",
"--endpoint", "--endpoint",
"/v1/chat/completions", "/v1/chat/completions",
"--endpoint-type", "--backend",
"openai-chat", "openai-chat",
] ]
result = subprocess.run(command, capture_output=True, text=True) result = subprocess.run(command, capture_output=True, text=True)
......
...@@ -1358,7 +1358,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]: ...@@ -1358,7 +1358,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]:
elif args.dataset_name == "sonnet": elif args.dataset_name == "sonnet":
dataset = SonnetDataset(dataset_path=args.dataset_path) dataset = SonnetDataset(dataset_path=args.dataset_path)
# For the "sonnet" dataset, formatting depends on the backend. # For the "sonnet" dataset, formatting depends on the backend.
if args.endpoint_type == "openai-chat": if args.backend == "openai-chat":
input_requests = dataset.sample( input_requests = dataset.sample(
num_requests=args.num_prompts, num_requests=args.num_prompts,
input_len=args.sonnet_input_len, input_len=args.sonnet_input_len,
...@@ -1462,7 +1462,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]: ...@@ -1462,7 +1462,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]:
"Please consider contributing if you would " "Please consider contributing if you would "
"like to add support for additional dataset formats.") "like to add support for additional dataset formats.")
if dataset_class.IS_MULTIMODAL and args.endpoint_type not in [ if dataset_class.IS_MULTIMODAL and args.backend not in [
"openai-chat", "openai-chat",
"openai-audio", "openai-audio",
]: ]:
...@@ -1470,7 +1470,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]: ...@@ -1470,7 +1470,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]:
# endpoint-type. # endpoint-type.
raise ValueError( raise ValueError(
"Multi-modal content is only supported on 'openai-chat' and " "Multi-modal content is only supported on 'openai-chat' and "
"'openai-audio' endpoint-type.") "'openai-audio' backends.")
input_requests = dataset_class( input_requests = dataset_class(
dataset_path=args.dataset_path, dataset_path=args.dataset_path,
dataset_subset=args.hf_subset, dataset_subset=args.hf_subset,
...@@ -1563,7 +1563,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]: ...@@ -1563,7 +1563,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]:
try: try:
# Enforce endpoint compatibility for multimodal datasets. # Enforce endpoint compatibility for multimodal datasets.
if args.dataset_name == "random-mm" and args.endpoint_type not in [ if args.dataset_name == "random-mm" and args.backend not in [
"openai-chat"]: "openai-chat"]:
raise ValueError( raise ValueError(
"Multi-modal content (images) is only supported on " "Multi-modal content (images) is only supported on "
......
...@@ -8,8 +8,8 @@ to launch the vLLM OpenAI API server: ...@@ -8,8 +8,8 @@ to launch the vLLM OpenAI API server:
On the client side, run: On the client side, run:
vllm bench serve \ vllm bench serve \
--endpoint-type <endpoint_type. Default 'openai'> \ --backend <backend or endpoint type. Default 'openai'> \
--label <benchmark result label. Default using endpoint_type> \ --label <benchmark result label. Default using backend> \
--model <your_model> \ --model <your_model> \
--dataset-name <dataset_name. Default 'random'> \ --dataset-name <dataset_name. Default 'random'> \
--request-rate <request_rate. Default inf> \ --request-rate <request_rate. Default inf> \
...@@ -52,6 +52,21 @@ TERM_PLOTLIB_AVAILABLE = ((importlib.util.find_spec("termplotlib") is not None) ...@@ -52,6 +52,21 @@ TERM_PLOTLIB_AVAILABLE = ((importlib.util.find_spec("termplotlib") is not None)
and (shutil.which("gnuplot") is not None)) and (shutil.which("gnuplot") is not None))
# TODO: Remove this in v0.11.0
class DeprecatedEndpointTypeAction(argparse.Action):
"""Argparse action for the deprecated --endpoint-type flag.
"""
def __call__(self, _, namespace, values, option_string=None):
warnings.warn(
"'--endpoint-type' is deprecated and will be removed in v0.11.0. "
"Please use '--backend' instead or remove this argument if you "
"have already set it.",
stacklevel=1,
)
setattr(namespace, self.dest, values)
class TaskType(Enum): class TaskType(Enum):
GENERATION = "generation" GENERATION = "generation"
EMBEDDING = "embedding" EMBEDDING = "embedding"
...@@ -470,7 +485,7 @@ async def benchmark( ...@@ -470,7 +485,7 @@ async def benchmark(
else: else:
request_func = ASYNC_REQUEST_FUNCS[endpoint_type] request_func = ASYNC_REQUEST_FUNCS[endpoint_type]
else: else:
raise ValueError(f"Unknown endpoint_type: {endpoint_type}") raise ValueError(f"Unknown backend: {endpoint_type}")
# Reuses connections across requests to reduce TLS handshake overhead. # Reuses connections across requests to reduce TLS handshake overhead.
connector = aiohttp.TCPConnector( connector = aiohttp.TCPConnector(
...@@ -850,24 +865,28 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace, ...@@ -850,24 +865,28 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
def add_cli_args(parser: argparse.ArgumentParser): def add_cli_args(parser: argparse.ArgumentParser):
add_dataset_parser(parser) add_dataset_parser(parser)
parser.add_argument(
"--endpoint-type",
type=str,
default="openai",
choices=list(ASYNC_REQUEST_FUNCS.keys()),
)
parser.add_argument( parser.add_argument(
"--label", "--label",
type=str, type=str,
default=None, default=None,
help="The label (prefix) of the benchmark results. If not specified, " help="The label (prefix) of the benchmark results. If not specified, "
"the endpoint type will be used as the label.", "the value of '--backend' will be used as the label.",
) )
parser.add_argument( parser.add_argument(
"--backend", "--backend",
type=str, type=str,
default="vllm", default="openai",
choices=list(ASYNC_REQUEST_FUNCS.keys()),
help="The type of backend or endpoint to use for the benchmark."
)
parser.add_argument(
"--endpoint-type",
type=str,
default=None,
choices=list(ASYNC_REQUEST_FUNCS.keys()), choices=list(ASYNC_REQUEST_FUNCS.keys()),
action=DeprecatedEndpointTypeAction,
help="'--endpoint-type' is deprecated and will be removed in v0.11.0. "
"Please use '--backend' instead.",
) )
parser.add_argument( parser.add_argument(
"--base-url", "--base-url",
...@@ -1165,7 +1184,6 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: ...@@ -1165,7 +1184,6 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
raise ValueError( raise ValueError(
"For exponential ramp-up, the start RPS cannot be 0.") "For exponential ramp-up, the start RPS cannot be 0.")
endpoint_type = args.endpoint_type
label = args.label label = args.label
model_id = args.model model_id = args.model
model_name = args.served_model_name model_name = args.served_model_name
...@@ -1228,7 +1246,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: ...@@ -1228,7 +1246,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
gc.freeze() gc.freeze()
benchmark_result = await benchmark( benchmark_result = await benchmark(
endpoint_type=args.endpoint_type, endpoint_type=args.backend,
api_url=api_url, api_url=api_url,
base_url=base_url, base_url=base_url,
model_id=model_id, model_id=model_id,
...@@ -1262,7 +1280,8 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: ...@@ -1262,7 +1280,8 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
# Setup # Setup
current_dt = datetime.now().strftime("%Y%m%d-%H%M%S") current_dt = datetime.now().strftime("%Y%m%d-%H%M%S")
result_json["date"] = current_dt result_json["date"] = current_dt
result_json["endpoint_type"] = args.endpoint_type result_json["endpoint_type"] = args.backend # for backward compatibility
result_json["backend"] = args.backend
result_json["label"] = label result_json["label"] = label
result_json["model_id"] = model_id result_json["model_id"] = model_id
result_json["tokenizer_id"] = tokenizer_id result_json["tokenizer_id"] = tokenizer_id
...@@ -1312,7 +1331,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: ...@@ -1312,7 +1331,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
base_model_id = model_id.split("/")[-1] base_model_id = model_id.split("/")[-1]
max_concurrency_str = (f"-concurrency{args.max_concurrency}" max_concurrency_str = (f"-concurrency{args.max_concurrency}"
if args.max_concurrency is not None else "") if args.max_concurrency is not None else "")
label = label or endpoint_type label = label or args.backend
if args.ramp_up_strategy is not None: if args.ramp_up_strategy is not None:
file_name = f"{label}-ramp-up-{args.ramp_up_strategy}-{args.ramp_up_start_rps}qps-{args.ramp_up_end_rps}qps{max_concurrency_str}-{base_model_id}-{current_dt}.json" # noqa file_name = f"{label}-ramp-up-{args.ramp_up_strategy}-{args.ramp_up_start_rps}qps-{args.ramp_up_end_rps}qps{max_concurrency_str}-{base_model_id}-{current_dt}.json" # noqa
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment