"vllm/vscode:/vscode.git/clone" did not exist on "2abd97592f947c041ba70329532f0cf62dd8971f"
Unverified Commit f208208b authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

fix(sglang): disable piecewise CUDA graph in launch scripts (#8609)

parent c5a60a04
......@@ -85,6 +85,7 @@ python3 -m "$WORKER_MODULE" \
--trust-remote-code \
--skip-tokenizer-init \
--enable-metrics \
--disable-piecewise-cuda-graph \
$GPU_MEM_ARGS \
"${TRACE_ARGS[@]}" \
"${EXTRA_ARGS[@]}" &
......
......@@ -86,6 +86,7 @@ python3 -m dynamo.sglang \
--trust-remote-code \
"${KV_EVENTS_ARGS_1[@]}" \
--enable-metrics \
--disable-piecewise-cuda-graph \
$GPU_MEM_ARGS \
"${TRACE_ARGS[@]}" &
......@@ -98,6 +99,7 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--trust-remote-code \
"${KV_EVENTS_ARGS_2[@]}" \
--enable-metrics \
--disable-piecewise-cuda-graph \
$GPU_MEM_ARGS \
"${TRACE_ARGS[@]}" &
......
......@@ -81,6 +81,7 @@ python3 -m dynamo.sglang \
--port 40000 \
--disaggregation-transfer-backend nixl \
--enable-metrics \
--disable-piecewise-cuda-graph \
$GPU_MEM_ARGS \
"${TRACE_ARGS[@]}" &
......@@ -97,6 +98,7 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--host 0.0.0.0 \
--disaggregation-transfer-backend nixl \
--enable-metrics \
--disable-piecewise-cuda-graph \
$GPU_MEM_ARGS \
"${TRACE_ARGS[@]}" &
......
......@@ -76,6 +76,7 @@ python3 -m dynamo.sglang \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557"}' \
--disaggregation-transfer-backend nixl \
--enable-metrics \
--disable-piecewise-cuda-graph \
"${TRACE_ARGS[@]}" &
# run prefill worker
......@@ -91,6 +92,7 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558"}' \
--disaggregation-transfer-backend nixl \
--enable-metrics \
--disable-piecewise-cuda-graph \
"${TRACE_ARGS[@]}" &
# run decode worker
......@@ -106,6 +108,7 @@ CUDA_VISIBLE_DEVICES=3 python3 -m dynamo.sglang \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5560"}' \
--disaggregation-transfer-backend nixl \
--enable-metrics \
--disable-piecewise-cuda-graph \
"${TRACE_ARGS[@]}" &
# run decode worker
......@@ -121,6 +124,7 @@ CUDA_VISIBLE_DEVICES=2 python3 -m dynamo.sglang \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5559"}' \
--disaggregation-transfer-backend nixl \
--enable-metrics \
--disable-piecewise-cuda-graph \
"${TRACE_ARGS[@]}" &
# Wait for any worker to exit (keeps script running)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment