Unverified Commit 6be9c9a4 authored by Kyle McGill's avatar Kyle McGill Committed by GitHub
Browse files

fix: trtllm has moved to version 1.2+ to enable cuda graph testing; fixed path...

fix: trtllm has moved to version 1.2+ to enable cuda graph testing; fixed path to config.yaml (#5133)
parent 5a158552
...@@ -151,9 +151,6 @@ def send_completion_request( ...@@ -151,9 +151,6 @@ def send_completion_request(
@pytest.mark.nightly @pytest.mark.nightly
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.skip(
reason="Enable these tests once `main` dynamo upgrades to TRTLLM 1.2+"
)
def test_kvbm_without_cuda_graph_enabled(request, runtime_services): def test_kvbm_without_cuda_graph_enabled(request, runtime_services):
""" """
End-to-end test for TRTLLM worker with cuda_graph_config not defined and End-to-end test for TRTLLM worker with cuda_graph_config not defined and
...@@ -168,12 +165,12 @@ def test_kvbm_without_cuda_graph_enabled(request, runtime_services): ...@@ -168,12 +165,12 @@ def test_kvbm_without_cuda_graph_enabled(request, runtime_services):
with DynamoFrontendProcess(request): with DynamoFrontendProcess(request):
logger.info("Frontend started.") logger.info("Frontend started.")
engine_config_with_cuda_graph_and_kvbm = ( engine_config_without_cuda_graph_and_kvbm = (
"tests/kvbm/engine_config_without_cuda_graph_and_kvbm.yaml" "tests/kvbm_integration/engine_config_without_cuda_graph_and_kvbm.yaml"
) )
logger.info("Starting worker...") logger.info("Starting worker...")
with DynamoWorkerProcess( with DynamoWorkerProcess(
request, "decode", engine_config_with_cuda_graph_and_kvbm request, "decode", engine_config_without_cuda_graph_and_kvbm
) as worker: ) as worker:
logger.info(f"Worker PID: {worker.get_pid()}") logger.info(f"Worker PID: {worker.get_pid()}")
...@@ -190,9 +187,6 @@ def test_kvbm_without_cuda_graph_enabled(request, runtime_services): ...@@ -190,9 +187,6 @@ def test_kvbm_without_cuda_graph_enabled(request, runtime_services):
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.nightly @pytest.mark.nightly
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.skip(
reason="Enable these tests once dynamo `main` upgrades to TRTLLM 1.2+"
)
def test_kvbm_with_cuda_graph_enabled(request, runtime_services): def test_kvbm_with_cuda_graph_enabled(request, runtime_services):
""" """
End-to-end test for TRTLLM worker with cuda_graph_config defined and End-to-end test for TRTLLM worker with cuda_graph_config defined and
...@@ -208,7 +202,7 @@ def test_kvbm_with_cuda_graph_enabled(request, runtime_services): ...@@ -208,7 +202,7 @@ def test_kvbm_with_cuda_graph_enabled(request, runtime_services):
logger.info("Frontend started.") logger.info("Frontend started.")
engine_config_with_cuda_graph_and_kvbm = ( engine_config_with_cuda_graph_and_kvbm = (
"tests/kvbm/engine_config_with_cuda_graph_and_kvbm.yaml" "tests/kvbm_integration/engine_config_with_cuda_graph_and_kvbm.yaml"
) )
logger.info("Starting worker...") logger.info("Starting worker...")
with DynamoWorkerProcess( with DynamoWorkerProcess(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment