Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
d821a8b9
Unverified
Commit
d821a8b9
authored
Nov 21, 2025
by
Yan Ru Pei
Committed by
GitHub
Nov 21, 2025
Browse files
chore: parallelize planner profile tests + bindings test cleanup (#4532)
Signed-off-by:
PeaBrane
<
yanrpei@gmail.com
>
parent
65f18884
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
66 additions
and
74 deletions
+66
-74
lib/bindings/python/tests/soak.py
lib/bindings/python/tests/soak.py
+0
-0
lib/bindings/python/tests/test_kv_bindings.py
lib/bindings/python/tests/test_kv_bindings.py
+20
-48
lib/bindings/python/tests/test_tensor.py
lib/bindings/python/tests/test_tensor.py
+3
-2
tests/profiler/test_profile_sla_aiconfigurator.py
tests/profiler/test_profile_sla_aiconfigurator.py
+8
-3
tests/profiler/test_profile_sla_dryrun.py
tests/profiler/test_profile_sla_dryrun.py
+35
-21
No files found.
lib/bindings/python/tests/soak.py
deleted
100644 → 0
View file @
65f18884
lib/bindings/python/tests/test_kv_bindings.py
View file @
d821a8b9
...
@@ -29,10 +29,7 @@ pytestmark = pytest.mark.pre_merge
...
@@ -29,10 +29,7 @@ pytestmark = pytest.mark.pre_merge
@
pytest
.
fixture
@
pytest
.
fixture
async
def
distributed_runtime
():
async
def
distributed_runtime
():
"""Function-scoped runtime fixture for use with @pytest.mark.forked tests.
"""Function-scoped runtime fixture for distributed runtime tests."""
Each test gets its own runtime in a forked process to avoid singleton conflicts.
"""
loop
=
asyncio
.
get_running_loop
()
loop
=
asyncio
.
get_running_loop
()
runtime
=
DistributedRuntime
(
loop
,
"etcd"
,
"nats"
)
runtime
=
DistributedRuntime
(
loop
,
"etcd"
,
"nats"
)
yield
runtime
yield
runtime
...
@@ -40,7 +37,6 @@ async def distributed_runtime():
...
@@ -40,7 +37,6 @@ async def distributed_runtime():
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
forked
async
def
test_radix_tree_binding
(
distributed_runtime
):
async
def
test_radix_tree_binding
(
distributed_runtime
):
"""Test RadixTree binding directly with store event and find matches"""
"""Test RadixTree binding directly with store event and find matches"""
import
json
import
json
...
@@ -107,7 +103,6 @@ async def test_radix_tree_binding(distributed_runtime):
...
@@ -107,7 +103,6 @@ async def test_radix_tree_binding(distributed_runtime):
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
forked
@
pytest
.
mark
.
parametrize
(
"num_threads"
,
[
2
,
3
,
5
,
128
])
@
pytest
.
mark
.
parametrize
(
"num_threads"
,
[
2
,
3
,
5
,
128
])
@
pytest
.
mark
.
parametrize
(
"prepopulate_worker_ids"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"prepopulate_worker_ids"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"expiration_duration_secs"
,
[
None
])
@
pytest
.
mark
.
parametrize
(
"expiration_duration_secs"
,
[
None
])
...
@@ -209,15 +204,7 @@ async def test_radix_tree_thread_safety(
...
@@ -209,15 +204,7 @@ async def test_radix_tree_thread_safety(
),
f
"Expected
{
expected_blocks_after_removal
}
block events after removal, got
{
len
(
blocks_after_removal
)
}
"
),
f
"Expected
{
expected_blocks_after_removal
}
block events after removal, got
{
len
(
blocks_after_removal
)
}
"
# TODO Figure out how to test with different kv_block_size
# Right now I get an error in EventPublisher init when I run this test
# back to back. It occurs when calling dynamo_llm_init and I think is related to the
# OnceCell initializations not being reset.
# The test works individually if I run it with 32, then 11, then 64.
# @pytest.mark.parametrize("kv_block_size", [11, 32, 64])
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
forked
@
pytest
.
mark
.
skip
(
reason
=
"Flakey in CI. Likely race condition going on."
)
async
def
test_event_handler
(
distributed_runtime
):
async
def
test_event_handler
(
distributed_runtime
):
kv_block_size
=
32
kv_block_size
=
32
namespace
=
"kv_test"
namespace
=
"kv_test"
...
@@ -225,7 +212,10 @@ async def test_event_handler(distributed_runtime):
...
@@ -225,7 +212,10 @@ async def test_event_handler(distributed_runtime):
kv_listener
=
distributed_runtime
.
namespace
(
namespace
).
component
(
component
)
kv_listener
=
distributed_runtime
.
namespace
(
namespace
).
component
(
component
)
# publisher
# publisher
worker_id
=
233
# Get actual worker_id from component (KvEventPublisher ignores the passed worker_id and uses component's connection_id)
# Create a dummy endpoint to access connection_id since Component doesn't expose it directly
dummy_endpoint
=
kv_listener
.
endpoint
(
"dummy"
)
worker_id
=
dummy_endpoint
.
connection_id
()
event_publisher
=
EventPublisher
(
kv_listener
,
worker_id
,
kv_block_size
)
event_publisher
=
EventPublisher
(
kv_listener
,
worker_id
,
kv_block_size
)
# indexer
# indexer
...
@@ -237,44 +227,26 @@ async def test_event_handler(distributed_runtime):
...
@@ -237,44 +227,26 @@ async def test_event_handler(distributed_runtime):
assert
not
scores
.
scores
assert
not
scores
.
scores
event_publisher
.
store_event
(
test_token
,
lora_id
)
event_publisher
.
store_event
(
test_token
,
lora_id
)
# wait for the event to be processed as it is sent asynchronously
# Wait for the event to be processed (sent asynchronously)
# Retry loop for CI environments where processing may take longer
await
asyncio
.
sleep
(
0.2
)
scores
=
await
indexer
.
find_matches_for_request
(
test_token
,
lora_id
)
worker_key
=
(
worker_id
,
0
)
# (worker_id, dp_rank)
worker_key
=
(
worker_id
,
0
)
# (worker_id, dp_rank)
for
retry
in
range
(
10
):
# Try up to 10 times
assert
scores
.
scores
,
"No scores found"
await
asyncio
.
sleep
(
0.5
)
# Wait 500ms between retries
assert
worker_key
in
scores
.
scores
,
f
"Worker
{
worker_key
}
not found in scores"
scores
=
await
indexer
.
find_matches_for_request
(
test_token
,
lora_id
)
assert
(
if
(
scores
.
scores
[
worker_key
]
==
1
scores
.
scores
),
f
"Expected score 1, got
{
scores
.
scores
[
worker_key
]
}
"
and
worker_key
in
scores
.
scores
and
scores
.
scores
[
worker_key
]
==
1
# Remove event and verify
):
break
if
retry
==
9
:
# Last iteration
# Provide detailed error message for debugging
assert
scores
.
scores
,
f
"No scores found after
{
(
retry
+
1
)
*
0.5
}
s"
assert
(
worker_key
in
scores
.
scores
),
f
"Worker
{
worker_key
}
not in scores after
{
(
retry
+
1
)
*
0.5
}
s"
assert
(
scores
.
scores
[
worker_key
]
==
1
),
f
"Expected score 1, got
{
scores
.
scores
.
get
(
worker_key
)
}
after
{
(
retry
+
1
)
*
0.5
}
s"
# remove event
event_publisher
.
remove_event
()
event_publisher
.
remove_event
()
# Retry loop for event removal verification
await
asyncio
.
sleep
(
0.2
)
for
retry
in
range
(
10
):
# Try up to 10 times
await
asyncio
.
sleep
(
0.5
)
# Wait 500ms between retries
scores
=
await
indexer
.
find_matches_for_request
(
test_token
,
lora_id
)
scores
=
await
indexer
.
find_matches_for_request
(
test_token
,
lora_id
)
assert
not
scores
.
scores
,
f
"Scores still present:
{
scores
.
scores
}
"
if
not
scores
.
scores
:
break
if
retry
==
9
:
# Last iteration
assert
(
not
scores
.
scores
),
f
"Scores still present after
{
(
retry
+
1
)
*
0.5
}
s:
{
scores
.
scores
}
"
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
forked
async
def
test_approx_kv_indexer
(
distributed_runtime
):
async
def
test_approx_kv_indexer
(
distributed_runtime
):
kv_block_size
=
32
kv_block_size
=
32
namespace
=
"kv_test"
namespace
=
"kv_test"
...
...
lib/bindings/python/tests/test_tensor.py
View file @
d821a8b9
...
@@ -5,15 +5,16 @@
...
@@ -5,15 +5,16 @@
import
os
import
os
import
pytest
import
uvloop
import
uvloop
from
dynamo.llm
import
ModelInput
,
ModelRuntimeConfig
,
ModelType
,
register_llm
from
dynamo.llm
import
ModelInput
,
ModelRuntimeConfig
,
ModelType
,
register_llm
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime
import
DistributedRuntime
TEST_END_TO_END
=
os
.
environ
.
get
(
"TEST_END_TO_END"
,
0
)
TEST_END_TO_END
=
os
.
environ
.
get
(
"TEST_END_TO_END"
,
0
)
@
dynamo_worker
()
@
pytest
.
mark
.
asyncio
async
def
test_register
(
runtime
:
DistributedRuntime
):
async
def
test_register
(
runtime
:
DistributedRuntime
):
component
=
runtime
.
namespace
(
"test"
).
component
(
"tensor"
)
component
=
runtime
.
namespace
(
"test"
).
component
(
"tensor"
)
...
...
tests/profiler/test_profile_sla_aiconfigurator.py
View file @
d821a8b9
...
@@ -37,15 +37,16 @@ class TestProfileSlaAiconfigurator:
...
@@ -37,15 +37,16 @@ class TestProfileSlaAiconfigurator:
"""Test class for profile_sla aiconfigurator functionality."""
"""Test class for profile_sla aiconfigurator functionality."""
@
pytest
.
fixture
@
pytest
.
fixture
def
trtllm_args
(
self
):
def
trtllm_args
(
self
,
request
):
class
Args
:
class
Args
:
def
__init__
(
self
):
def
__init__
(
self
):
self
.
model
=
""
self
.
model
=
""
self
.
dgd_image
=
""
self
.
dgd_image
=
""
self
.
backend
=
"trtllm"
self
.
backend
=
"trtllm"
self
.
config
=
"examples/backends/trtllm/deploy/disagg.yaml"
self
.
config
=
"examples/backends/trtllm/deploy/disagg.yaml"
self
.
output_dir
=
"/tmp/test_profiling_results"
# Use unique output directory per test for parallel execution
self
.
namespace
=
"test-namespace"
self
.
output_dir
=
f
"/tmp/test_profiling_results_
{
request
.
node
.
name
}
"
self
.
namespace
=
f
"test-namespace-
{
request
.
node
.
name
}
"
self
.
min_num_gpus_per_engine
=
1
self
.
min_num_gpus_per_engine
=
1
self
.
max_num_gpus_per_engine
=
8
self
.
max_num_gpus_per_engine
=
8
self
.
skip_existing_results
=
False
self
.
skip_existing_results
=
False
...
@@ -76,6 +77,7 @@ class TestProfileSlaAiconfigurator:
...
@@ -76,6 +77,7 @@ class TestProfileSlaAiconfigurator:
return
Args
()
return
Args
()
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"missing_arg"
,
[
"aic_system"
,
"aic_hf_id"
])
@
pytest
.
mark
.
parametrize
(
"missing_arg"
,
[
"aic_system"
,
"aic_hf_id"
])
async
def
test_aiconfigurator_missing_args
(
self
,
trtllm_args
,
missing_arg
):
async
def
test_aiconfigurator_missing_args
(
self
,
trtllm_args
,
missing_arg
):
...
@@ -86,6 +88,7 @@ class TestProfileSlaAiconfigurator:
...
@@ -86,6 +88,7 @@ class TestProfileSlaAiconfigurator:
await
run_profile
(
trtllm_args
)
await
run_profile
(
trtllm_args
)
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"arg_name, bad_value"
,
"arg_name, bad_value"
,
...
@@ -103,11 +106,13 @@ class TestProfileSlaAiconfigurator:
...
@@ -103,11 +106,13 @@ class TestProfileSlaAiconfigurator:
await
run_profile
(
trtllm_args
)
await
run_profile
(
trtllm_args
)
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_trtllm_aiconfigurator_single_model
(
self
,
trtllm_args
):
async
def
test_trtllm_aiconfigurator_single_model
(
self
,
trtllm_args
):
# Test that profile_sla works with the model & backend in the trtllm_args fixture.
# Test that profile_sla works with the model & backend in the trtllm_args fixture.
await
run_profile
(
trtllm_args
)
await
run_profile
(
trtllm_args
)
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"backend, aic_backend_version"
,
"backend, aic_backend_version"
,
...
...
tests/profiler/test_profile_sla_dryrun.py
View file @
d821a8b9
...
@@ -41,15 +41,16 @@ class TestProfileSLADryRun:
...
@@ -41,15 +41,16 @@ class TestProfileSLADryRun:
"""Test class for profile_sla dry-run functionality."""
"""Test class for profile_sla dry-run functionality."""
@
pytest
.
fixture
@
pytest
.
fixture
def
vllm_args
(
self
):
def
vllm_args
(
self
,
request
):
"""Create arguments for vllm backend dry-run test."""
"""Create arguments for vllm backend dry-run test."""
class
Args
:
class
Args
:
def
__init__
(
self
):
def
__init__
(
self
):
self
.
backend
=
"vllm"
self
.
backend
=
"vllm"
self
.
config
=
"examples/backends/vllm/deploy/disagg.yaml"
self
.
config
=
"examples/backends/vllm/deploy/disagg.yaml"
self
.
output_dir
=
"/tmp/test_profiling_results"
# Use unique output directory per test for parallel execution
self
.
namespace
=
"test-namespace"
self
.
output_dir
=
f
"/tmp/test_profiling_results_
{
request
.
node
.
name
}
"
self
.
namespace
=
f
"test-namespace-
{
request
.
node
.
name
}
"
self
.
model
=
""
self
.
model
=
""
self
.
dgd_image
=
""
self
.
dgd_image
=
""
self
.
min_num_gpus_per_engine
=
1
self
.
min_num_gpus_per_engine
=
1
...
@@ -83,15 +84,16 @@ class TestProfileSLADryRun:
...
@@ -83,15 +84,16 @@ class TestProfileSLADryRun:
return
Args
()
return
Args
()
@
pytest
.
fixture
@
pytest
.
fixture
def
sglang_args
(
self
):
def
sglang_args
(
self
,
request
):
"""Create arguments for sglang backend dry-run test."""
"""Create arguments for sglang backend dry-run test."""
class
Args
:
class
Args
:
def
__init__
(
self
):
def
__init__
(
self
):
self
.
backend
=
"sglang"
self
.
backend
=
"sglang"
self
.
config
=
"examples/backends/sglang/deploy/disagg.yaml"
self
.
config
=
"examples/backends/sglang/deploy/disagg.yaml"
self
.
output_dir
=
"/tmp/test_profiling_results"
# Use unique output directory per test for parallel execution
self
.
namespace
=
"test-namespace"
self
.
output_dir
=
f
"/tmp/test_profiling_results_
{
request
.
node
.
name
}
"
self
.
namespace
=
f
"test-namespace-
{
request
.
node
.
name
}
"
self
.
model
=
""
self
.
model
=
""
self
.
dgd_image
=
""
self
.
dgd_image
=
""
self
.
min_num_gpus_per_engine
=
1
self
.
min_num_gpus_per_engine
=
1
...
@@ -124,6 +126,7 @@ class TestProfileSLADryRun:
...
@@ -124,6 +126,7 @@ class TestProfileSLADryRun:
return
Args
()
return
Args
()
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_vllm_dryrun
(
self
,
vllm_args
):
async
def
test_vllm_dryrun
(
self
,
vllm_args
):
"""Test that profile_sla dry-run works for vllm backend with disagg.yaml config."""
"""Test that profile_sla dry-run works for vllm backend with disagg.yaml config."""
...
@@ -131,6 +134,7 @@ class TestProfileSLADryRun:
...
@@ -131,6 +134,7 @@ class TestProfileSLADryRun:
await
run_profile
(
vllm_args
)
await
run_profile
(
vllm_args
)
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_sglang_dryrun
(
self
,
sglang_args
):
async
def
test_sglang_dryrun
(
self
,
sglang_args
):
"""Test that profile_sla dry-run works for sglang backend with disagg.yaml config."""
"""Test that profile_sla dry-run works for sglang backend with disagg.yaml config."""
...
@@ -138,15 +142,16 @@ class TestProfileSLADryRun:
...
@@ -138,15 +142,16 @@ class TestProfileSLADryRun:
await
run_profile
(
sglang_args
)
await
run_profile
(
sglang_args
)
@
pytest
.
fixture
@
pytest
.
fixture
def
trtllm_args
(
self
):
def
trtllm_args
(
self
,
request
):
"""Create arguments for trtllm backend dry-run test."""
"""Create arguments for trtllm backend dry-run test."""
class
Args
:
class
Args
:
def
__init__
(
self
):
def
__init__
(
self
):
self
.
backend
=
"trtllm"
self
.
backend
=
"trtllm"
self
.
config
=
"examples/backends/trtllm/deploy/disagg.yaml"
self
.
config
=
"examples/backends/trtllm/deploy/disagg.yaml"
self
.
output_dir
=
"/tmp/test_profiling_results"
# Use unique output directory per test for parallel execution
self
.
namespace
=
"test-namespace"
self
.
output_dir
=
f
"/tmp/test_profiling_results_
{
request
.
node
.
name
}
"
self
.
namespace
=
f
"test-namespace-
{
request
.
node
.
name
}
"
self
.
model
=
""
self
.
model
=
""
self
.
dgd_image
=
""
self
.
dgd_image
=
""
self
.
min_num_gpus_per_engine
=
1
self
.
min_num_gpus_per_engine
=
1
...
@@ -179,6 +184,7 @@ class TestProfileSLADryRun:
...
@@ -179,6 +184,7 @@ class TestProfileSLADryRun:
return
Args
()
return
Args
()
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_trtllm_dryrun
(
self
,
trtllm_args
):
async
def
test_trtllm_dryrun
(
self
,
trtllm_args
):
"""Test that profile_sla dry-run works for trtllm backend with disagg.yaml config."""
"""Test that profile_sla dry-run works for trtllm backend with disagg.yaml config."""
...
@@ -186,15 +192,16 @@ class TestProfileSLADryRun:
...
@@ -186,15 +192,16 @@ class TestProfileSLADryRun:
await
run_profile
(
trtllm_args
)
await
run_profile
(
trtllm_args
)
@
pytest
.
fixture
@
pytest
.
fixture
def
sglang_moe_args
(
self
):
def
sglang_moe_args
(
self
,
request
):
"""Create arguments for trtllm backend dry-run test."""
"""Create arguments for trtllm backend dry-run test."""
class
Args
:
class
Args
:
def
__init__
(
self
):
def
__init__
(
self
):
self
.
backend
=
"sglang"
self
.
backend
=
"sglang"
self
.
config
=
"recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml"
self
.
config
=
"recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml"
self
.
output_dir
=
"/tmp/test_profiling_results"
# Use unique output directory per test for parallel execution
self
.
namespace
=
"test-namespace"
self
.
output_dir
=
f
"/tmp/test_profiling_results_
{
request
.
node
.
name
}
"
self
.
namespace
=
f
"test-namespace-
{
request
.
node
.
name
}
"
self
.
model
=
""
self
.
model
=
""
self
.
dgd_image
=
""
self
.
dgd_image
=
""
self
.
min_num_gpus_per_engine
=
8
self
.
min_num_gpus_per_engine
=
8
...
@@ -228,6 +235,7 @@ class TestProfileSLADryRun:
...
@@ -228,6 +235,7 @@ class TestProfileSLADryRun:
return
Args
()
return
Args
()
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_sglang_moe_dryrun
(
self
,
sglang_moe_args
):
async
def
test_sglang_moe_dryrun
(
self
,
sglang_moe_args
):
"""Test that profile_sla dry-run works for sglang backend with MoE config."""
"""Test that profile_sla dry-run works for sglang backend with MoE config."""
...
@@ -255,15 +263,16 @@ class TestProfileSLADryRun:
...
@@ -255,15 +263,16 @@ class TestProfileSLADryRun:
)
)
@
pytest
.
fixture
@
pytest
.
fixture
def
vllm_args_with_model_autogen
(
self
):
def
vllm_args_with_model_autogen
(
self
,
request
):
"""Create arguments for vllm backend with model-based search space autogeneration."""
"""Create arguments for vllm backend with model-based search space autogeneration."""
class
Args
:
class
Args
:
def
__init__
(
self
):
def
__init__
(
self
):
self
.
backend
=
"vllm"
self
.
backend
=
"vllm"
self
.
config
=
""
self
.
config
=
""
self
.
output_dir
=
"/tmp/test_profiling_results"
# Use unique output directory per test for parallel execution
self
.
namespace
=
"test-namespace"
self
.
output_dir
=
f
"/tmp/test_profiling_results_
{
request
.
node
.
name
}
"
self
.
namespace
=
f
"test-namespace-
{
request
.
node
.
name
}
"
self
.
model
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
# Specify model for autogen
self
.
model
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
# Specify model for autogen
self
.
dgd_image
=
""
self
.
dgd_image
=
""
# Set to 0 to trigger auto-generation path
# Set to 0 to trigger auto-generation path
...
@@ -293,6 +302,7 @@ class TestProfileSLADryRun:
...
@@ -293,6 +302,7 @@ class TestProfileSLADryRun:
return
Args
()
return
Args
()
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
patch
(
"benchmarks.profiler.utils.search_space_autogen.get_gpu_summary"
)
@
patch
(
"benchmarks.profiler.utils.search_space_autogen.get_gpu_summary"
)
@
patch
(
"benchmarks.profiler.utils.search_space_autogen.get_model_info"
)
@
patch
(
"benchmarks.profiler.utils.search_space_autogen.get_model_info"
)
...
@@ -319,15 +329,16 @@ class TestProfileSLADryRun:
...
@@ -319,15 +329,16 @@ class TestProfileSLADryRun:
await
run_profile
(
vllm_args_with_model_autogen
)
await
run_profile
(
vllm_args_with_model_autogen
)
@
pytest
.
fixture
@
pytest
.
fixture
def
sglang_args_with_model_autogen
(
self
):
def
sglang_args_with_model_autogen
(
self
,
request
):
"""Create arguments for sglang backend with model-based search space autogeneration."""
"""Create arguments for sglang backend with model-based search space autogeneration."""
class
Args
:
class
Args
:
def
__init__
(
self
):
def
__init__
(
self
):
self
.
backend
=
"sglang"
self
.
backend
=
"sglang"
self
.
config
=
""
self
.
config
=
""
self
.
output_dir
=
"/tmp/test_profiling_results"
# Use unique output directory per test for parallel execution
self
.
namespace
=
"test-namespace"
self
.
output_dir
=
f
"/tmp/test_profiling_results_
{
request
.
node
.
name
}
"
self
.
namespace
=
f
"test-namespace-
{
request
.
node
.
name
}
"
self
.
model
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
# Specify model for autogen
self
.
model
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
# Specify model for autogen
self
.
dgd_image
=
""
self
.
dgd_image
=
""
self
.
min_num_gpus_per_engine
=
0
self
.
min_num_gpus_per_engine
=
0
...
@@ -355,6 +366,7 @@ class TestProfileSLADryRun:
...
@@ -355,6 +366,7 @@ class TestProfileSLADryRun:
return
Args
()
return
Args
()
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
patch
(
"benchmarks.profiler.utils.search_space_autogen.get_gpu_summary"
)
@
patch
(
"benchmarks.profiler.utils.search_space_autogen.get_gpu_summary"
)
@
patch
(
"benchmarks.profiler.utils.search_space_autogen.get_model_info"
)
@
patch
(
"benchmarks.profiler.utils.search_space_autogen.get_model_info"
)
...
@@ -381,15 +393,16 @@ class TestProfileSLADryRun:
...
@@ -381,15 +393,16 @@ class TestProfileSLADryRun:
await
run_profile
(
sglang_args_with_model_autogen
)
await
run_profile
(
sglang_args_with_model_autogen
)
@
pytest
.
fixture
@
pytest
.
fixture
def
trtllm_args_with_model_autogen
(
self
):
def
trtllm_args_with_model_autogen
(
self
,
request
):
"""Create arguments for trtllm backend with model-based search space autogeneration."""
"""Create arguments for trtllm backend with model-based search space autogeneration."""
class
Args
:
class
Args
:
def
__init__
(
self
):
def
__init__
(
self
):
self
.
backend
=
"trtllm"
self
.
backend
=
"trtllm"
self
.
config
=
""
self
.
config
=
""
self
.
output_dir
=
"/tmp/test_profiling_results"
# Use unique output directory per test for parallel execution
self
.
namespace
=
"test-namespace"
self
.
output_dir
=
f
"/tmp/test_profiling_results_
{
request
.
node
.
name
}
"
self
.
namespace
=
f
"test-namespace-
{
request
.
node
.
name
}
"
self
.
model
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
# Specify model for autogen
self
.
model
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
# Specify model for autogen
self
.
dgd_image
=
""
self
.
dgd_image
=
""
self
.
min_num_gpus_per_engine
=
0
self
.
min_num_gpus_per_engine
=
0
...
@@ -417,6 +430,7 @@ class TestProfileSLADryRun:
...
@@ -417,6 +430,7 @@ class TestProfileSLADryRun:
return
Args
()
return
Args
()
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
patch
(
"benchmarks.profiler.utils.search_space_autogen.get_gpu_summary"
)
@
patch
(
"benchmarks.profiler.utils.search_space_autogen.get_gpu_summary"
)
@
patch
(
"benchmarks.profiler.utils.search_space_autogen.get_model_info"
)
@
patch
(
"benchmarks.profiler.utils.search_space_autogen.get_model_info"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment