Unverified Commit 5bbbeae3 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

fix: port forward to available ports when benchmarking (#2795)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
parent d39d676b
......@@ -55,21 +55,26 @@ OPTIONS:
--verbose Enable verbose output
EXAMPLES:
# Compare aggregated vs disaggregated Dynamo deployments
# Compare Dynamo deployments of a single backend
$0 --namespace \$NAMESPACE \\
--input agg=components/backends/vllm/deploy/agg.yaml \\
--input disagg=components/backends/vllm/deploy/disagg.yaml
# Compare different backend types (vLLM vs TensorRT-LLM)
$0 --namespace \$NAMESPACE \\
--input vllm-agg=components/backends/vllm/deploy/agg.yaml \\
--input trtllm-agg=components/backends/trtllm/deploy/agg.yaml
# Compare Dynamo deployment vs external endpoint
$0 --namespace \$NAMESPACE \\
--input dynamo=components/backends/vllm/deploy/disagg.yaml \\
--input external=http://localhost:8000
# Compare three different configurations
# Compare multiple different configurations (vLLM, TensorRT-LLM, SGLang)
$0 --namespace \$NAMESPACE \\
--input dynamo-agg=components/backends/vllm/deploy/agg.yaml \\
--input dynamo-disagg=components/backends/vllm/deploy/disagg.yaml \\
--input external-vllm=http://localhost:8000
--input vllm-agg=components/backends/vllm/deploy/agg.yaml \\
--input trtllm-disagg=components/backends/trtllm/deploy/disagg.yaml \\
--input existing-sglang=http://localhost:8000
# Benchmark a single Dynamo deployment
$0 --namespace \$NAMESPACE \\
......
......@@ -17,6 +17,7 @@ import argparse
import asyncio
import os
import re
import socket
import subprocess
import sys
import time
......@@ -29,6 +30,24 @@ import kubernetes_asyncio as kubernetes
import yaml
from kubernetes_asyncio import client, config
def find_available_port(start_port: int = 8000) -> int:
"""Find the first available TCP port on 127.0.0.1 starting at start_port (inclusive), scanning up to start_port+99."""
for port in range(
start_port, start_port + 100
): # Try ports start_port..start_port+99
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
s.bind(("127.0.0.1", port))
return port
except OSError:
continue
raise RuntimeError(
f"No available ports found in range {start_port}-{start_port+99}"
)
# Example chat completion request for testing deployments
EXAMPLE_CHAT_REQUEST = {
"model": "Qwen/Qwen3-0.6B",
......@@ -119,14 +138,21 @@ class DynamoDeploymentClient:
self.custom_api = client.CustomObjectsApi(self.k8s_client)
self.core_api = client.CoreV1Api(self.k8s_client)
def port_forward_frontend(self, local_port: int = 8000, quiet: bool = False) -> str:
def port_forward_frontend(
self, local_port: Optional[int] = None, quiet: bool = False
) -> str:
"""
Port forward the frontend service to a local port.
Args:
local_port: Local port to forward to (default: 8000)
local_port: Local port to forward to (if None, find first available port starting from 8000)
quiet: If True, suppress kubectl port-forward output messages (default: False)
"""
if local_port is None:
local_port = find_available_port(8000)
if not quiet:
print(f"Using available local port: {local_port}")
cmd = [
"kubectl",
"port-forward",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment