Unverified Commit 2f2a13a3 authored by Jason Zhou's avatar Jason Zhou Committed by GitHub
Browse files

test: add AIConfigurator dense model tests for Dynamo Planner Profiler (#4526)


Signed-off-by: default avatarJason Zhou <jasonzho@nvidia.com>
parent c6555852
......@@ -77,6 +77,9 @@ llm_engine.h
### Ruff ###
.ruff_cache/
### MyPy ###
.mypy_cache/
### Python ###
__pycache__/
*.py[cod]
......
......@@ -123,7 +123,8 @@ class VllmV1ConfigModifier:
args = break_arguments(args)
# remove --is-prefill-worker flag
args.remove("--is-prefill-worker")
if "--is-prefill-worker" in args:
args.remove("--is-prefill-worker")
# disable prefix caching
if "--enable-prefix-caching" in args:
......
......@@ -40,7 +40,7 @@ classifiers = [
]
dependencies = [
"aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759",
"aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@release/0.4.0",
"networkx",
"pandas",
"pydantic>=2",
......
......@@ -37,7 +37,7 @@ class TestProfileSlaAiconfigurator:
"""Test class for profile_sla aiconfigurator functionality."""
@pytest.fixture
def trtllm_args(self, request):
def llm_args(self, request):
class Args:
def __init__(self):
self.model = ""
......@@ -82,12 +82,12 @@ class TestProfileSlaAiconfigurator:
@pytest.mark.parallel
@pytest.mark.asyncio
@pytest.mark.parametrize("missing_arg", ["aic_system", "aic_hf_id"])
async def test_aiconfigurator_missing_args(self, trtllm_args, missing_arg):
async def test_aiconfigurator_missing_args(self, llm_args, missing_arg):
# Check that validation error happens when a required arg is missing.
# Note: aic_backend_version is optional - when None, auto-detects latest version
setattr(trtllm_args, missing_arg, None)
setattr(llm_args, missing_arg, None)
with pytest.raises(ValueError):
await run_profile(trtllm_args)
await run_profile(llm_args)
@pytest.mark.pre_merge
@pytest.mark.gpu_0
......@@ -102,21 +102,21 @@ class TestProfileSlaAiconfigurator:
("aic_backend_version", "0.1.0"),
],
)
async def test_aiconfiguator_no_data(self, trtllm_args, arg_name, bad_value):
async def test_aiconfigurator_no_data(self, llm_args, arg_name, bad_value):
# Check that an appropriate error is raised when the system/model/backend
# is not found in the aiconfigurator database.
setattr(trtllm_args, arg_name, bad_value)
setattr(llm_args, arg_name, bad_value)
with pytest.raises(ValueError, match="Database not found"):
await run_profile(trtllm_args)
await run_profile(llm_args)
@pytest.mark.pre_merge
@pytest.mark.parallel
@pytest.mark.asyncio
@pytest.mark.gpu_1
@pytest.mark.performance
async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
# Test that profile_sla works with the model & backend in the trtllm_args fixture.
await run_profile(trtllm_args)
async def test_trtllm_aiconfigurator_single_model(self, llm_args):
# Test that profile_sla works with the model & backend in the llm_args fixture.
await run_profile(llm_args)
@pytest.mark.parallel
@pytest.mark.asyncio
......@@ -129,6 +129,10 @@ class TestProfileSlaAiconfigurator:
("trtllm", None),
("trtllm", "0.20.0"),
("trtllm", "1.0.0rc3"),
("vllm", None),
("vllm", "0.11.0"),
("sglang", None),
("sglang", "0.5.1.post1"),
],
)
@pytest.mark.parametrize(
......@@ -138,11 +142,11 @@ class TestProfileSlaAiconfigurator:
"meta-llama/Llama-3.1-405B",
],
)
async def test_trtllm_aiconfigurator_many(
self, trtllm_args, hf_model_id, backend, aic_backend_version
async def test_aiconfigurator_dense_models(
self, llm_args, hf_model_id, backend, aic_backend_version
):
# Test that profile_sla works with a variety of backend versions and model names.
trtllm_args.aic_hf_id = hf_model_id
trtllm_args.backend = backend
trtllm_args.aic_backend_version = aic_backend_version
await run_profile(trtllm_args)
llm_args.aic_hf_id = hf_model_id
llm_args.backend = backend
llm_args.aic_backend_version = aic_backend_version
await run_profile(llm_args)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment