health_check_ports=[port,6880],# vLLM server + KVBM metrics
timeout=timeout,
display_output=True,
terminate_existing=True,
stragglers=["vllm"],
straggler_commands=["vllm serve"],
log_dir=log_dir,
)asproc:
# Give KVBM connector extra time to fully initialize
print("Waiting 5 seconds for KVBM connector to fully initialize...")
time.sleep(5)
# Create wrapper object for compatibility with existing test code
classServerWrapper:
"""Wrapper to maintain compatibility with LLMServerManager interface."""
def__init__(self):
self.base_url=f"http://localhost:{port}"
self.server_type=server_type
self.cpu_cache_blocks=cpu_blocks
self.gpu_cache_blocks=gpu_blocks
self.port=port
self.proc=proc
yieldServerWrapper()
classTestDeterminism:
classTestDeterminism:
"""Test class for determinism validation."""
"""Test class for determinism validation."""
...
@@ -684,3 +786,94 @@ class TestDeterminism:
...
@@ -684,3 +786,94 @@ class TestDeterminism:
assert(
assert(
success_rate>=success_rate_threshold
success_rate>=success_rate_threshold
),f"Model is not deterministic across cache reset: {total_failed} comparisons failed, success rate {success_rate:.1%} lower than expected {success_rate_threshold*100}%"
),f"Model is not deterministic across cache reset: {total_failed} comparisons failed, success rate {success_rate:.1%} lower than expected {success_rate_threshold*100}%"