Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
5bbbeae3
Unverified
Commit
5bbbeae3
authored
Sep 02, 2025
by
hhzhang16
Committed by
GitHub
Sep 02, 2025
Browse files
fix: port forward to available ports when benchmarking (#2795)
Signed-off-by:
Hannah Zhang
<
hannahz@nvidia.com
>
parent
d39d676b
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
38 additions
and
7 deletions
+38
-7
benchmarks/benchmark.sh
benchmarks/benchmark.sh
+10
-5
deploy/utils/dynamo_deployment.py
deploy/utils/dynamo_deployment.py
+28
-2
No files found.
benchmarks/benchmark.sh
View file @
5bbbeae3
...
@@ -55,21 +55,26 @@ OPTIONS:
...
@@ -55,21 +55,26 @@ OPTIONS:
--verbose Enable verbose output
--verbose Enable verbose output
EXAMPLES:
EXAMPLES:
# Compare
aggregated vs disaggregated Dynamo deployments
# Compare
Dynamo deployments of a single backend
$0
--namespace
\$
NAMESPACE
\\
$0
--namespace
\$
NAMESPACE
\\
--input agg=components/backends/vllm/deploy/agg.yaml
\\
--input agg=components/backends/vllm/deploy/agg.yaml
\\
--input disagg=components/backends/vllm/deploy/disagg.yaml
--input disagg=components/backends/vllm/deploy/disagg.yaml
# Compare different backend types (vLLM vs TensorRT-LLM)
$0
--namespace
\$
NAMESPACE
\\
--input vllm-agg=components/backends/vllm/deploy/agg.yaml
\\
--input trtllm-agg=components/backends/trtllm/deploy/agg.yaml
# Compare Dynamo deployment vs external endpoint
# Compare Dynamo deployment vs external endpoint
$0
--namespace
\$
NAMESPACE
\\
$0
--namespace
\$
NAMESPACE
\\
--input dynamo=components/backends/vllm/deploy/disagg.yaml
\\
--input dynamo=components/backends/vllm/deploy/disagg.yaml
\\
--input external=http://localhost:8000
--input external=http://localhost:8000
# Compare
thre
e different configurations
# Compare
multipl
e different configurations
(vLLM, TensorRT-LLM, SGLang)
$0
--namespace
\$
NAMESPACE
\\
$0
--namespace
\$
NAMESPACE
\\
--input
dynamo
-agg=components/backends/vllm/deploy/agg.yaml
\\
--input
vllm
-agg=components/backends/vllm/deploy/agg.yaml
\\
--input
dynamo
-disagg=components/backends/
v
llm/deploy/disagg.yaml
\\
--input
trtllm
-disagg=components/backends/
trt
llm/deploy/disagg.yaml
\\
--input ex
ternal-vllm
=http://localhost:8000
--input ex
isting-sglang
=http://localhost:8000
# Benchmark a single Dynamo deployment
# Benchmark a single Dynamo deployment
$0
--namespace
\$
NAMESPACE
\\
$0
--namespace
\$
NAMESPACE
\\
...
...
deploy/utils/dynamo_deployment.py
View file @
5bbbeae3
...
@@ -17,6 +17,7 @@ import argparse
...
@@ -17,6 +17,7 @@ import argparse
import
asyncio
import
asyncio
import
os
import
os
import
re
import
re
import
socket
import
subprocess
import
subprocess
import
sys
import
sys
import
time
import
time
...
@@ -29,6 +30,24 @@ import kubernetes_asyncio as kubernetes
...
@@ -29,6 +30,24 @@ import kubernetes_asyncio as kubernetes
import
yaml
import
yaml
from
kubernetes_asyncio
import
client
,
config
from
kubernetes_asyncio
import
client
,
config
def
find_available_port
(
start_port
:
int
=
8000
)
->
int
:
"""Find the first available TCP port on 127.0.0.1 starting at start_port (inclusive), scanning up to start_port+99."""
for
port
in
range
(
start_port
,
start_port
+
100
):
# Try ports start_port..start_port+99
try
:
with
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
as
s
:
s
.
setsockopt
(
socket
.
SOL_SOCKET
,
socket
.
SO_REUSEADDR
,
1
)
s
.
bind
((
"127.0.0.1"
,
port
))
return
port
except
OSError
:
continue
raise
RuntimeError
(
f
"No available ports found in range
{
start_port
}
-
{
start_port
+
99
}
"
)
# Example chat completion request for testing deployments
# Example chat completion request for testing deployments
EXAMPLE_CHAT_REQUEST
=
{
EXAMPLE_CHAT_REQUEST
=
{
"model"
:
"Qwen/Qwen3-0.6B"
,
"model"
:
"Qwen/Qwen3-0.6B"
,
...
@@ -119,14 +138,21 @@ class DynamoDeploymentClient:
...
@@ -119,14 +138,21 @@ class DynamoDeploymentClient:
self
.
custom_api
=
client
.
CustomObjectsApi
(
self
.
k8s_client
)
self
.
custom_api
=
client
.
CustomObjectsApi
(
self
.
k8s_client
)
self
.
core_api
=
client
.
CoreV1Api
(
self
.
k8s_client
)
self
.
core_api
=
client
.
CoreV1Api
(
self
.
k8s_client
)
def
port_forward_frontend
(
self
,
local_port
:
int
=
8000
,
quiet
:
bool
=
False
)
->
str
:
def
port_forward_frontend
(
self
,
local_port
:
Optional
[
int
]
=
None
,
quiet
:
bool
=
False
)
->
str
:
"""
"""
Port forward the frontend service to a local port.
Port forward the frontend service to a local port.
Args:
Args:
local_port: Local port to forward to (
default:
8000)
local_port: Local port to forward to (
if None, find first available port starting from
8000)
quiet: If True, suppress kubectl port-forward output messages (default: False)
quiet: If True, suppress kubectl port-forward output messages (default: False)
"""
"""
if
local_port
is
None
:
local_port
=
find_available_port
(
8000
)
if
not
quiet
:
print
(
f
"Using available local port:
{
local_port
}
"
)
cmd
=
[
cmd
=
[
"kubectl"
,
"kubectl"
,
"port-forward"
,
"port-forward"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment