Unverified Commit 5bbbeae3 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

fix: port forward to available ports when benchmarking (#2795)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
parent d39d676b
...@@ -55,21 +55,26 @@ OPTIONS: ...@@ -55,21 +55,26 @@ OPTIONS:
--verbose Enable verbose output --verbose Enable verbose output
EXAMPLES: EXAMPLES:
# Compare aggregated vs disaggregated Dynamo deployments # Compare Dynamo deployments of a single backend
$0 --namespace \$NAMESPACE \\ $0 --namespace \$NAMESPACE \\
--input agg=components/backends/vllm/deploy/agg.yaml \\ --input agg=components/backends/vllm/deploy/agg.yaml \\
--input disagg=components/backends/vllm/deploy/disagg.yaml --input disagg=components/backends/vllm/deploy/disagg.yaml
# Compare different backend types (vLLM vs TensorRT-LLM)
$0 --namespace \$NAMESPACE \\
--input vllm-agg=components/backends/vllm/deploy/agg.yaml \\
--input trtllm-agg=components/backends/trtllm/deploy/agg.yaml
# Compare Dynamo deployment vs external endpoint # Compare Dynamo deployment vs external endpoint
$0 --namespace \$NAMESPACE \\ $0 --namespace \$NAMESPACE \\
--input dynamo=components/backends/vllm/deploy/disagg.yaml \\ --input dynamo=components/backends/vllm/deploy/disagg.yaml \\
--input external=http://localhost:8000 --input external=http://localhost:8000
# Compare three different configurations # Compare multiple different configurations (vLLM, TensorRT-LLM, SGLang)
$0 --namespace \$NAMESPACE \\ $0 --namespace \$NAMESPACE \\
--input dynamo-agg=components/backends/vllm/deploy/agg.yaml \\ --input vllm-agg=components/backends/vllm/deploy/agg.yaml \\
--input dynamo-disagg=components/backends/vllm/deploy/disagg.yaml \\ --input trtllm-disagg=components/backends/trtllm/deploy/disagg.yaml \\
--input external-vllm=http://localhost:8000 --input existing-sglang=http://localhost:8000
# Benchmark a single Dynamo deployment # Benchmark a single Dynamo deployment
$0 --namespace \$NAMESPACE \\ $0 --namespace \$NAMESPACE \\
......
...@@ -17,6 +17,7 @@ import argparse ...@@ -17,6 +17,7 @@ import argparse
import asyncio import asyncio
import os import os
import re import re
import socket
import subprocess import subprocess
import sys import sys
import time import time
...@@ -29,6 +30,24 @@ import kubernetes_asyncio as kubernetes ...@@ -29,6 +30,24 @@ import kubernetes_asyncio as kubernetes
import yaml import yaml
from kubernetes_asyncio import client, config from kubernetes_asyncio import client, config
def find_available_port(start_port: int = 8000) -> int:
"""Find the first available TCP port on 127.0.0.1 starting at start_port (inclusive), scanning up to start_port+99."""
for port in range(
start_port, start_port + 100
): # Try ports start_port..start_port+99
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
s.bind(("127.0.0.1", port))
return port
except OSError:
continue
raise RuntimeError(
f"No available ports found in range {start_port}-{start_port+99}"
)
# Example chat completion request for testing deployments # Example chat completion request for testing deployments
EXAMPLE_CHAT_REQUEST = { EXAMPLE_CHAT_REQUEST = {
"model": "Qwen/Qwen3-0.6B", "model": "Qwen/Qwen3-0.6B",
...@@ -119,14 +138,21 @@ class DynamoDeploymentClient: ...@@ -119,14 +138,21 @@ class DynamoDeploymentClient:
self.custom_api = client.CustomObjectsApi(self.k8s_client) self.custom_api = client.CustomObjectsApi(self.k8s_client)
self.core_api = client.CoreV1Api(self.k8s_client) self.core_api = client.CoreV1Api(self.k8s_client)
def port_forward_frontend(self, local_port: int = 8000, quiet: bool = False) -> str: def port_forward_frontend(
self, local_port: Optional[int] = None, quiet: bool = False
) -> str:
""" """
Port forward the frontend service to a local port. Port forward the frontend service to a local port.
Args: Args:
local_port: Local port to forward to (default: 8000) local_port: Local port to forward to (if None, find first available port starting from 8000)
quiet: If True, suppress kubectl port-forward output messages (default: False) quiet: If True, suppress kubectl port-forward output messages (default: False)
""" """
if local_port is None:
local_port = find_available_port(8000)
if not quiet:
print(f"Using available local port: {local_port}")
cmd = [ cmd = [
"kubectl", "kubectl",
"port-forward", "port-forward",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment