Unverified Commit f208208b authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

fix(sglang): disable piecewise CUDA graph in launch scripts (#8609)

parent c5a60a04
...@@ -85,6 +85,7 @@ python3 -m "$WORKER_MODULE" \ ...@@ -85,6 +85,7 @@ python3 -m "$WORKER_MODULE" \
--trust-remote-code \ --trust-remote-code \
--skip-tokenizer-init \ --skip-tokenizer-init \
--enable-metrics \ --enable-metrics \
--disable-piecewise-cuda-graph \
$GPU_MEM_ARGS \ $GPU_MEM_ARGS \
"${TRACE_ARGS[@]}" \ "${TRACE_ARGS[@]}" \
"${EXTRA_ARGS[@]}" & "${EXTRA_ARGS[@]}" &
......
...@@ -86,6 +86,7 @@ python3 -m dynamo.sglang \ ...@@ -86,6 +86,7 @@ python3 -m dynamo.sglang \
--trust-remote-code \ --trust-remote-code \
"${KV_EVENTS_ARGS_1[@]}" \ "${KV_EVENTS_ARGS_1[@]}" \
--enable-metrics \ --enable-metrics \
--disable-piecewise-cuda-graph \
$GPU_MEM_ARGS \ $GPU_MEM_ARGS \
"${TRACE_ARGS[@]}" & "${TRACE_ARGS[@]}" &
...@@ -98,6 +99,7 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \ ...@@ -98,6 +99,7 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--trust-remote-code \ --trust-remote-code \
"${KV_EVENTS_ARGS_2[@]}" \ "${KV_EVENTS_ARGS_2[@]}" \
--enable-metrics \ --enable-metrics \
--disable-piecewise-cuda-graph \
$GPU_MEM_ARGS \ $GPU_MEM_ARGS \
"${TRACE_ARGS[@]}" & "${TRACE_ARGS[@]}" &
......
...@@ -81,6 +81,7 @@ python3 -m dynamo.sglang \ ...@@ -81,6 +81,7 @@ python3 -m dynamo.sglang \
--port 40000 \ --port 40000 \
--disaggregation-transfer-backend nixl \ --disaggregation-transfer-backend nixl \
--enable-metrics \ --enable-metrics \
--disable-piecewise-cuda-graph \
$GPU_MEM_ARGS \ $GPU_MEM_ARGS \
"${TRACE_ARGS[@]}" & "${TRACE_ARGS[@]}" &
...@@ -97,6 +98,7 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \ ...@@ -97,6 +98,7 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--host 0.0.0.0 \ --host 0.0.0.0 \
--disaggregation-transfer-backend nixl \ --disaggregation-transfer-backend nixl \
--enable-metrics \ --enable-metrics \
--disable-piecewise-cuda-graph \
$GPU_MEM_ARGS \ $GPU_MEM_ARGS \
"${TRACE_ARGS[@]}" & "${TRACE_ARGS[@]}" &
......
...@@ -76,6 +76,7 @@ python3 -m dynamo.sglang \ ...@@ -76,6 +76,7 @@ python3 -m dynamo.sglang \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557"}' \ --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557"}' \
--disaggregation-transfer-backend nixl \ --disaggregation-transfer-backend nixl \
--enable-metrics \ --enable-metrics \
--disable-piecewise-cuda-graph \
"${TRACE_ARGS[@]}" & "${TRACE_ARGS[@]}" &
# run prefill worker # run prefill worker
...@@ -91,6 +92,7 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \ ...@@ -91,6 +92,7 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558"}' \ --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558"}' \
--disaggregation-transfer-backend nixl \ --disaggregation-transfer-backend nixl \
--enable-metrics \ --enable-metrics \
--disable-piecewise-cuda-graph \
"${TRACE_ARGS[@]}" & "${TRACE_ARGS[@]}" &
# run decode worker # run decode worker
...@@ -106,6 +108,7 @@ CUDA_VISIBLE_DEVICES=3 python3 -m dynamo.sglang \ ...@@ -106,6 +108,7 @@ CUDA_VISIBLE_DEVICES=3 python3 -m dynamo.sglang \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5560"}' \ --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5560"}' \
--disaggregation-transfer-backend nixl \ --disaggregation-transfer-backend nixl \
--enable-metrics \ --enable-metrics \
--disable-piecewise-cuda-graph \
"${TRACE_ARGS[@]}" & "${TRACE_ARGS[@]}" &
# run decode worker # run decode worker
...@@ -121,6 +124,7 @@ CUDA_VISIBLE_DEVICES=2 python3 -m dynamo.sglang \ ...@@ -121,6 +124,7 @@ CUDA_VISIBLE_DEVICES=2 python3 -m dynamo.sglang \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5559"}' \ --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5559"}' \
--disaggregation-transfer-backend nixl \ --disaggregation-transfer-backend nixl \
--enable-metrics \ --enable-metrics \
--disable-piecewise-cuda-graph \
"${TRACE_ARGS[@]}" & "${TRACE_ARGS[@]}" &
# Wait for any worker to exit (keeps script running) # Wait for any worker to exit (keeps script running)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment