Unverified Commit 95214e8b authored by Kris Hung's avatar Kris Hung Committed by GitHub
Browse files

chore: Improve KVBM test handling and update gitignore (#3987)

parent 6bec8b31
...@@ -38,6 +38,10 @@ CMakeCache.txt ...@@ -38,6 +38,10 @@ CMakeCache.txt
*pytest_report.md *pytest_report.md
*pytest_report.xml *pytest_report.xml
# Test artifacts and output directories
*.shakespeare.txt
test_determinism_*/
**/__pycache__ **/__pycache__
**/venv **/venv
**/.venv **/.venv
......
...@@ -55,7 +55,7 @@ export DYN_KVBM_CPU_CACHE_GB=4 ...@@ -55,7 +55,7 @@ export DYN_KVBM_CPU_CACHE_GB=4
export DYN_KVBM_DISK_CACHE_GB=8 export DYN_KVBM_DISK_CACHE_GB=8
# [Experimental] Option 3: Disk cache only (GPU -> Disk direct offloading, bypassing CPU) # [Experimental] Option 3: Disk cache only (GPU -> Disk direct offloading, bypassing CPU)
# NOTE: this option is only experimental and it might give out the best performance. # NOTE: this option is only experimental and it might not give out the best performance.
# NOTE: disk offload filtering is not support when using this option. # NOTE: disk offload filtering is not support when using this option.
export DYN_KVBM_DISK_CACHE_GB=8 export DYN_KVBM_DISK_CACHE_GB=8
......
...@@ -69,7 +69,7 @@ cd $DYNAMO_HOME/components/backends/vllm ...@@ -69,7 +69,7 @@ cd $DYNAMO_HOME/components/backends/vllm
> export DYN_KVBM_DISK_CACHE_GB=8 > export DYN_KVBM_DISK_CACHE_GB=8
> >
> # [Experimental] Option 3: Disk cache only (GPU -> Disk direct offloading, bypassing CPU) > # [Experimental] Option 3: Disk cache only (GPU -> Disk direct offloading, bypassing CPU)
> # NOTE: this option is only experimental and it might give out the best performance. > # NOTE: this option is only experimental and it might not give out the best performance.
> # NOTE: disk offload filtering is not support when using this option. > # NOTE: disk offload filtering is not support when using this option.
> export DYN_KVBM_DISK_CACHE_GB=8 > export DYN_KVBM_DISK_CACHE_GB=8
> ``` > ```
......
...@@ -409,7 +409,7 @@ def llm_server(request, runtime_services): ...@@ -409,7 +409,7 @@ def llm_server(request, runtime_services):
if importlib.util.find_spec("vllm") is not None: if importlib.util.find_spec("vllm") is not None:
server_type = ServerType.vllm server_type = ServerType.vllm
else: else:
raise Exception("vllm module is not available in the current environment.") pytest.skip("vllm module is not available in the current environment.")
server_manager = LLMServerManager( server_manager = LLMServerManager(
port=port, port=port,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment