Unverified Commit 880db75d authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files

fix: shell-quote Ray leader args (#6693)


Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent d8b7d394
...@@ -207,12 +207,16 @@ func injectMpDistributedLaunchFlags(container *corev1.Container, role Role, serv ...@@ -207,12 +207,16 @@ func injectMpDistributedLaunchFlags(container *corev1.Container, role Role, serv
func injectRayDistributedLaunchFlags(container *corev1.Container, role Role, serviceName string, multinodeDeployer MultinodeDeployer) { func injectRayDistributedLaunchFlags(container *corev1.Container, role Role, serviceName string, multinodeDeployer MultinodeDeployer) {
switch role { switch role {
case RoleLeader: case RoleLeader:
fullCommand := strings.Join(container.Command, " ") quotedCmd := make([]string, len(container.Command))
originalArgs := strings.Join(container.Args, " ") for i, tok := range container.Command {
// Use Ray executor for multi-node vLLM deployments. quotedCmd[i] = shellQuoteForBashC(tok)
// vLLM will create a placement group spanning all Ray nodes and spawn workers automatically. }
// DO NOT pass --nnodes or --node-rank - these are only for mp backend. fullCommand := strings.Join(quotedCmd, " ")
// The Ray executor handles multi-node distribution via placement groups. quotedArgs := make([]string, len(container.Args))
for i, arg := range container.Args {
quotedArgs[i] = shellQuoteForBashC(arg)
}
originalArgs := strings.Join(quotedArgs, " ")
vllmMultinodeFlags := "--distributed-executor-backend ray" vllmMultinodeFlags := "--distributed-executor-backend ray"
container.Args = []string{fmt.Sprintf("ray start --head --port=%s && %s %s %s", VLLMPort, fullCommand, originalArgs, vllmMultinodeFlags)} container.Args = []string{fmt.Sprintf("ray start --head --port=%s && %s %s %s", VLLMPort, fullCommand, originalArgs, vllmMultinodeFlags)}
case RoleWorker: case RoleWorker:
......
...@@ -48,6 +48,27 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -48,6 +48,27 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
expectedArgs: []string{fmt.Sprintf("ray start --head --port=%s && python3 -m dynamo.vllm --model test %s 8 --distributed-executor-backend ray", VLLMPort, tensorParallelSizeFlag)}, expectedArgs: []string{fmt.Sprintf("ray start --head --port=%s && python3 -m dynamo.vllm --model test %s 8 --distributed-executor-backend ray", VLLMPort, tensorParallelSizeFlag)},
expectProbesRemoved: true, expectProbesRemoved: true,
}, },
{
name: "multinode leader uses ray with JSON args (no annotations = legacy)",
numberOfNodes: 3,
role: RoleLeader,
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{
Command: []string{"python3", "-m", "dynamo.vllm"},
Args: []string{
"--model", "test", tensorParallelSizeFlag, "8",
"--kv-transfer-config",
`{"kv_connector": "NixlConnector", "kv_role": "kv_both"}`,
},
},
gpuCount: 4,
expectedArgs: []string{fmt.Sprintf(
`ray start --head --port=%s && python3 -m dynamo.vllm --model test %s 8 --kv-transfer-config "{\"kv_connector\": \"NixlConnector\", \"kv_role\": \"kv_both\"}" --distributed-executor-backend ray`,
VLLMPort, tensorParallelSizeFlag,
)},
expectProbesRemoved: true,
},
{ {
name: "multinode worker uses ray (no annotations = legacy)", name: "multinode worker uses ray (no annotations = legacy)",
numberOfNodes: 3, numberOfNodes: 3,
......
...@@ -48,9 +48,13 @@ func injectFlagsIntoContainerCommand(container *corev1.Container, flags string, ...@@ -48,9 +48,13 @@ func injectFlagsIntoContainerCommand(container *corev1.Container, flags string,
// Direct python command case // Direct python command case
if needsShell { if needsShell {
// Transform to shell wrapper for env var interpretation. // Transform to shell wrapper for env var interpretation.
// Quote each original arg individually so JSON and other special // Quote each token individually so paths with spaces or special
// characters survive shell interpretation. // characters survive shell interpretation.
fullCommand := strings.Join(container.Command, " ") quotedCmd := make([]string, len(container.Command))
for i, tok := range container.Command {
quotedCmd[i] = shellQuoteForBashC(tok)
}
fullCommand := strings.Join(quotedCmd, " ")
quotedArgs := make([]string, len(container.Args)) quotedArgs := make([]string, len(container.Args))
for i, arg := range container.Args { for i, arg := range container.Args {
quotedArgs[i] = shellQuoteForBashC(arg) quotedArgs[i] = shellQuoteForBashC(arg)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment