Unverified Commit cad41b1c authored by cmdy's avatar cmdy Committed by GitHub
Browse files

fix: fix sglang multinode deployment with LWS (#7485)


Signed-off-by: default avatarcmdy <zhang_lin66@foxmail.com>
parent 3f6c6249
......@@ -3,6 +3,7 @@ package dynamo
import (
"fmt"
"regexp"
"strings"
"github.com/ai-dynamo/dynamo/deploy/operator/api/v1alpha1"
corev1 "k8s.io/api/core/v1"
......@@ -68,7 +69,7 @@ func (b *SGLangBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int
// getMultinodeFlags returns the multinode flags and whether shell interpretation is needed
func (b *SGLangBackend) getMultinodeFlags(numberOfNodes int32, role Role, serviceName string, multinodeDeployer MultinodeDeployer) (string, bool) {
distInitAddr := fmt.Sprintf("%s:%s", multinodeDeployer.GetLeaderHostname(serviceName), SglangPort)
leaderHostname := multinodeDeployer.GetLeaderHostname(serviceName)
var nodeRank string
var needsShell bool
......@@ -76,10 +77,28 @@ func (b *SGLangBackend) getMultinodeFlags(numberOfNodes int32, role Role, servic
if role == RoleLeader {
nodeRank = "0"
needsShell = false
leaderHostname = convertIfShellVar(leaderHostname)
} else {
nodeRank, needsShell = multinodeDeployer.GetNodeRank()
}
distInitAddr := fmt.Sprintf("%s:%s", leaderHostname, SglangPort)
flags := fmt.Sprintf("--dist-init-addr %s --nnodes %d --node-rank %s", distInitAddr, numberOfNodes, nodeRank)
return flags, needsShell
}
// Match a string representing a shell variable, such as $ABC
var shellVarRe = regexp.MustCompile(`^\$([A-Za-z_][A-Za-z0-9_]*)$`)
// convertIfShellVar convert shell variable $ABC to $(ABC)
func convertIfShellVar(s string) string {
if strings.HasPrefix(s, "$(") && strings.HasSuffix(s, ")") {
return s
}
if match := shellVarRe.FindStringSubmatch(s); len(match) > 1 {
return "$(" + match[1] + ")"
}
return s
}
......@@ -273,7 +273,7 @@ func TestSGLangBackend_ShellCommandInjection(t *testing.T) {
multinodeDeployer: &LWSMultinodeDeployer{},
initialCommand: []string{"sh", "-c"},
initialArgs: []string{"python -m dynamo.sglang"},
expectedArgs: []string{"python -m dynamo.sglang --dist-init-addr $LWS_LEADER_ADDRESS:29500 --nnodes 2 --node-rank 0"},
expectedArgs: []string{"python -m dynamo.sglang --dist-init-addr $(LWS_LEADER_ADDRESS):29500 --nnodes 2 --node-rank 0"},
description: "LWS shell commands should use LWS variables",
},
{
......@@ -611,7 +611,6 @@ func TestSGLangBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
t.Errorf("Expected no environment variable changes, but env count changed from %d to %d", originalEnvCount, len(container.Env))
}
}
})
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment