Unverified Commit 7dea77c3 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

chore: add info about cache flush and bump version (#1839)

parent a604c7f0
...@@ -89,9 +89,9 @@ RUN pip install --break-system-packages "sglang==0.4.8.post1" ...@@ -89,9 +89,9 @@ RUN pip install --break-system-packages "sglang==0.4.8.post1"
ENV SGL_FORCE_SHUTDOWN=1 ENV SGL_FORCE_SHUTDOWN=1
WORKDIR /sgl-workspace WORKDIR /sgl-workspace
# support batch completions for SGL benchmarking # include flush cache endpoint and server support
# https://github.com/ai-dynamo/dynamo/pull/1721 # https://github.com/ai-dynamo/dynamo/pull/1769
ARG DYNAMO_COMMIT="9cbf803172f8ed4b3342c9d5237b49cb07d4d95c" ARG DYNAMO_COMMIT="bd91dca6141e05bcfbe9bd4dea54cc58b9e37d75"
RUN git clone https://github.com/ai-dynamo/dynamo.git && cd dynamo && git checkout ${DYNAMO_COMMIT} RUN git clone https://github.com/ai-dynamo/dynamo.git && cd dynamo && git checkout ${DYNAMO_COMMIT}
# install dynamo in editable mode # install dynamo in editable mode
......
...@@ -72,6 +72,8 @@ In each container, you should be in the `/sgl-workspace/dynamo/examples/sglang` ...@@ -72,6 +72,8 @@ In each container, you should be in the `/sgl-workspace/dynamo/examples/sglang`
```bash ```bash
# run ingress # run ingress
dynamo run in=http out=dyn & dynamo run in=http out=dyn &
# optionally run the http server that allows you to flush the kv cache for all workers (see benchmarking section below)
python3 utils/sgl_http_server.py --ns dynamo &
# run prefill worker # run prefill worker
python3 components/worker.py \ python3 components/worker.py \
--model-path /model/ \ --model-path /model/ \
...@@ -170,6 +172,8 @@ Example usage: ...@@ -170,6 +172,8 @@ Example usage:
```bash ```bash
# warmup # warmup
./utils/bench.sh HEAD_PREFILL_NODE_IP --type warmup ./utils/bench.sh HEAD_PREFILL_NODE_IP --type warmup
# if you ran the http server on the head prefill node, you can optionally flush the kv cache for all workers (similar to SGLangs benchmarking script)
curl -X POST http://${HEAD_PREFILL_NODE_IP}:9001/flush_cache
# run benchmark # run benchmark
./utils/bench.sh HEAD_PREFILL_NODE_IP --type e2e ./utils/bench.sh HEAD_PREFILL_NODE_IP --type e2e
``` ```
...@@ -181,6 +185,8 @@ Example usage: ...@@ -181,6 +185,8 @@ Example usage:
```bash ```bash
# generate data # generate data
python3 utils/generate_bench_data.py --output data.jsonl --num-prompts 8192 --input-len 4096 --output-len 5 --model deepseek-ai/DeepSeek-R1 python3 utils/generate_bench_data.py --output data.jsonl --num-prompts 8192 --input-len 4096 --output-len 5 --model deepseek-ai/DeepSeek-R1
# if you ran the http server on the head prefill node, you can optionally flush the kv cache for all workers (similar to SGLangs benchmarking script)
curl -X POST http://${HEAD_PREFILL_NODE_IP}:9001/flush_cache
# run benchmark # run benchmark
./utils/bench.sh HEAD_PREFILL_NODE_IP --type custom_completions ./utils/bench.sh HEAD_PREFILL_NODE_IP --type custom_completions
``` ```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment