build: enable kvbm in trtllm container (#2956)

Signed-off-by: Anant Sharma <anants@nvidia.com>

build: enable kvbm in trtllm container (#2956)
Signed-off-by: Anant Sharma <anants@nvidia.com>
70f99382 · Anant Sharma · GitHub · 2ae20102 · 70f99382 · 70f99382
Unverified Commit 70f99382 authored Sep 24, 2025 by Anant Sharma Committed by GitHub Sep 24, 2025
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

container/build.sh container/build.sh +2 -2

tests/kvbm/test_determinism.py tests/kvbm/test_determinism.py +1 -1

No files found.
--- a/container/build.sh
+++ b/container/build.sh
@@ -693,8 +693,8 @@ if [  ! -z ${RELEASE_BUILD} ]; then
    BUILD_ARGS+=" --build-arg RELEASE_BUILD=${RELEASE_BUILD} "
 fi

-if [[ $FRAMEWORK == "VLLM" ]]; then
-    echo "Forcing enable_kvbm to true in vLLM image build"
+if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]]; then
+    echo "Forcing enable_kvbm to true in ${FRAMEWORK} image build"
    ENABLE_KVBM=true
 fi


--- a/tests/kvbm/test_determinism.py
+++ b/tests/kvbm/test_determinism.py
@@ -775,7 +775,6 @@ class TestDeterminism:
        ],
        indirect=True,
    )
-    @pytest.mark.vllm
    def test_determinism_with_cache_reset(self, tester, llm_server, runtime_services):
        """Test determinism across cache reset: run test with warmup, reset cache, run again without warmup."""
        print("\n" + "=" * 70)
@@ -916,6 +915,7 @@ class TestDeterminism:
        "num_prompts",
        [int(x) for x in os.environ.get("KVBM_IFEVAL_PROMPTS", "120").split(",")],
    )
+    @pytest.mark.skip(reason="Flaky test: DIS-665")
    def test_concurrent_determinism_with_ifeval(
        self,
        tester,