Unverified Commit 2f2a13a3 authored by Jason Zhou's avatar Jason Zhou Committed by GitHub
Browse files

test: add AIConfigurator dense model tests for Dynamo Planner Profiler (#4526)


Signed-off-by: default avatarJason Zhou <jasonzho@nvidia.com>
parent c6555852
...@@ -77,6 +77,9 @@ llm_engine.h ...@@ -77,6 +77,9 @@ llm_engine.h
### Ruff ### ### Ruff ###
.ruff_cache/ .ruff_cache/
### MyPy ###
.mypy_cache/
### Python ### ### Python ###
__pycache__/ __pycache__/
*.py[cod] *.py[cod]
......
...@@ -123,7 +123,8 @@ class VllmV1ConfigModifier: ...@@ -123,7 +123,8 @@ class VllmV1ConfigModifier:
args = break_arguments(args) args = break_arguments(args)
# remove --is-prefill-worker flag # remove --is-prefill-worker flag
args.remove("--is-prefill-worker") if "--is-prefill-worker" in args:
args.remove("--is-prefill-worker")
# disable prefix caching # disable prefix caching
if "--enable-prefix-caching" in args: if "--enable-prefix-caching" in args:
......
...@@ -40,7 +40,7 @@ classifiers = [ ...@@ -40,7 +40,7 @@ classifiers = [
] ]
dependencies = [ dependencies = [
"aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759", "aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@release/0.4.0",
"networkx", "networkx",
"pandas", "pandas",
"pydantic>=2", "pydantic>=2",
......
...@@ -37,7 +37,7 @@ class TestProfileSlaAiconfigurator: ...@@ -37,7 +37,7 @@ class TestProfileSlaAiconfigurator:
"""Test class for profile_sla aiconfigurator functionality.""" """Test class for profile_sla aiconfigurator functionality."""
@pytest.fixture @pytest.fixture
def trtllm_args(self, request): def llm_args(self, request):
class Args: class Args:
def __init__(self): def __init__(self):
self.model = "" self.model = ""
...@@ -82,12 +82,12 @@ class TestProfileSlaAiconfigurator: ...@@ -82,12 +82,12 @@ class TestProfileSlaAiconfigurator:
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.parametrize("missing_arg", ["aic_system", "aic_hf_id"]) @pytest.mark.parametrize("missing_arg", ["aic_system", "aic_hf_id"])
async def test_aiconfigurator_missing_args(self, trtllm_args, missing_arg): async def test_aiconfigurator_missing_args(self, llm_args, missing_arg):
# Check that validation error happens when a required arg is missing. # Check that validation error happens when a required arg is missing.
# Note: aic_backend_version is optional - when None, auto-detects latest version # Note: aic_backend_version is optional - when None, auto-detects latest version
setattr(trtllm_args, missing_arg, None) setattr(llm_args, missing_arg, None)
with pytest.raises(ValueError): with pytest.raises(ValueError):
await run_profile(trtllm_args) await run_profile(llm_args)
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.gpu_0 @pytest.mark.gpu_0
...@@ -102,21 +102,21 @@ class TestProfileSlaAiconfigurator: ...@@ -102,21 +102,21 @@ class TestProfileSlaAiconfigurator:
("aic_backend_version", "0.1.0"), ("aic_backend_version", "0.1.0"),
], ],
) )
async def test_aiconfiguator_no_data(self, trtllm_args, arg_name, bad_value): async def test_aiconfigurator_no_data(self, llm_args, arg_name, bad_value):
# Check that an appropriate error is raised when the system/model/backend # Check that an appropriate error is raised when the system/model/backend
# is not found in the aiconfigurator database. # is not found in the aiconfigurator database.
setattr(trtllm_args, arg_name, bad_value) setattr(llm_args, arg_name, bad_value)
with pytest.raises(ValueError, match="Database not found"): with pytest.raises(ValueError, match="Database not found"):
await run_profile(trtllm_args) await run_profile(llm_args)
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.performance @pytest.mark.performance
async def test_trtllm_aiconfigurator_single_model(self, trtllm_args): async def test_trtllm_aiconfigurator_single_model(self, llm_args):
# Test that profile_sla works with the model & backend in the trtllm_args fixture. # Test that profile_sla works with the model & backend in the llm_args fixture.
await run_profile(trtllm_args) await run_profile(llm_args)
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.asyncio @pytest.mark.asyncio
...@@ -129,6 +129,10 @@ class TestProfileSlaAiconfigurator: ...@@ -129,6 +129,10 @@ class TestProfileSlaAiconfigurator:
("trtllm", None), ("trtllm", None),
("trtllm", "0.20.0"), ("trtllm", "0.20.0"),
("trtllm", "1.0.0rc3"), ("trtllm", "1.0.0rc3"),
("vllm", None),
("vllm", "0.11.0"),
("sglang", None),
("sglang", "0.5.1.post1"),
], ],
) )
@pytest.mark.parametrize( @pytest.mark.parametrize(
...@@ -138,11 +142,11 @@ class TestProfileSlaAiconfigurator: ...@@ -138,11 +142,11 @@ class TestProfileSlaAiconfigurator:
"meta-llama/Llama-3.1-405B", "meta-llama/Llama-3.1-405B",
], ],
) )
async def test_trtllm_aiconfigurator_many( async def test_aiconfigurator_dense_models(
self, trtllm_args, hf_model_id, backend, aic_backend_version self, llm_args, hf_model_id, backend, aic_backend_version
): ):
# Test that profile_sla works with a variety of backend versions and model names. # Test that profile_sla works with a variety of backend versions and model names.
trtllm_args.aic_hf_id = hf_model_id llm_args.aic_hf_id = hf_model_id
trtllm_args.backend = backend llm_args.backend = backend
trtllm_args.aic_backend_version = aic_backend_version llm_args.aic_backend_version = aic_backend_version
await run_profile(trtllm_args) await run_profile(llm_args)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment