"vscode:/vscode.git/clone" did not exist on "268d017e24c145a514fd267fa976b6e55a01bc44"
Unverified Commit c0eaed48 authored by jthomson04's avatar jthomson04 Committed by GitHub
Browse files

fix: Fix KVBM Guide (#2539)


Signed-off-by: default avatarjthomson04 <jwillthomson19@gmail.com>
parent 86a4a58e
...@@ -30,13 +30,10 @@ To use KVBM in vLLM, you can follow the steps below: ...@@ -30,13 +30,10 @@ To use KVBM in vLLM, you can follow the steps below:
docker compose -f deploy/metrics/docker-compose.yml up -d docker compose -f deploy/metrics/docker-compose.yml up -d
# build a container containing vllm and kvbm # build a container containing vllm and kvbm
./container/build.sh --framework kvbm ./container/build.sh --framework vllm --enable-kvbm
# launch the container # launch the container
./container/run.sh --framework kvbm -it --mount-workspace --use-nixl-gds ./container/run.sh --framework vllm -it --mount-workspace --use-nixl-gds
# enable using kvbm instead of vllm's own kv cache manager
export DYN_KVBM_MANAGER=kvbm
# enable kv offloading to CPU memory # enable kv offloading to CPU memory
# 4 means 4GB of CPU memory would be used # 4 means 4GB of CPU memory would be used
...@@ -47,7 +44,7 @@ export DYN_KVBM_CPU_CACHE_GB=4 ...@@ -47,7 +44,7 @@ export DYN_KVBM_CPU_CACHE_GB=4
export DYN_KVBM_DISK_CACHE_GB=8 export DYN_KVBM_DISK_CACHE_GB=8
# serve an example LLM model # serve an example LLM model
vllm serve deepseek-ai/DeepSeek-R1-Distill-Llama-8B vllm serve --kv-transfer-config '{"kv_connector":"DynamoConnector","kv_role":"kv_both", "kv_connector_module_path": "dynamo.llm.vllm_integration.connector"}' deepseek-ai/DeepSeek-R1-Distill-Llama-8B
# make a call to LLM # make a call to LLM
curl localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{ curl localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment