Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
317b9614
Unverified
Commit
317b9614
authored
Apr 07, 2026
by
Julien Mancuso
Committed by
GitHub
Apr 07, 2026
Browse files
fix(operator): use ConfigMap for vLLM multinode wait-for-leader script (#7954)
parent
4cdc49c2
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
241 additions
and
66 deletions
+241
-66
deploy/operator/internal/controller/dynamographdeployment_controller.go
...r/internal/controller/dynamographdeployment_controller.go
+22
-0
deploy/operator/internal/discovery/resource.go
deploy/operator/internal/discovery/resource.go
+1
-1
deploy/operator/internal/dynamo/backend_vllm.go
deploy/operator/internal/dynamo/backend_vllm.go
+148
-26
deploy/operator/internal/dynamo/backend_vllm_test.go
deploy/operator/internal/dynamo/backend_vllm_test.go
+67
-36
deploy/operator/internal/dynamo/graph.go
deploy/operator/internal/dynamo/graph.go
+3
-3
No files found.
deploy/operator/internal/controller/dynamographdeployment_controller.go
View file @
317b9614
...
...
@@ -326,6 +326,13 @@ func (r *DynamoGraphDeploymentReconciler) reconcileResources(ctx context.Context
return
ReconcileResult
{},
fmt
.
Errorf
(
"failed to reconcile EPP resources: %w"
,
err
)
}
// Reconcile the wait-for-leader ConfigMap for multinode mp deployments
err
=
r
.
reconcileWaitLeaderConfigMap
(
ctx
,
dynamoDeployment
)
if
err
!=
nil
{
logger
.
Error
(
err
,
"Failed to reconcile wait-leader ConfigMap"
)
return
ReconcileResult
{},
fmt
.
Errorf
(
"failed to reconcile wait-leader ConfigMap: %w"
,
err
)
}
// Determine if any service is multinode
hasMultinode
:=
dynamoDeployment
.
HasAnyMultinodeService
()
...
...
@@ -1582,6 +1589,21 @@ func (r *DynamoGraphDeploymentReconciler) reconcileEPPResources(ctx context.Cont
return
nil
}
// reconcileWaitLeaderConfigMap ensures the wait-for-leader Python script
// ConfigMap exists for multinode DGDs. The ConfigMap is only mounted by
// vLLM mp worker pods (via UpdatePodSpec); for other backends it is inert.
func
(
r
*
DynamoGraphDeploymentReconciler
)
reconcileWaitLeaderConfigMap
(
ctx
context
.
Context
,
dgd
*
nvidiacomv1alpha1
.
DynamoGraphDeployment
)
error
{
if
!
dgd
.
HasAnyMultinodeService
()
{
return
nil
}
cm
:=
dynamo
.
GenerateWaitLeaderConfigMap
(
dgd
.
Name
,
dgd
.
Namespace
)
_
,
_
,
err
:=
commoncontroller
.
SyncResource
(
ctx
,
r
,
dgd
,
func
(
ctx
context
.
Context
)
(
*
corev1
.
ConfigMap
,
bool
,
error
)
{
return
cm
,
false
,
nil
})
return
err
}
func
(
r
*
DynamoGraphDeploymentReconciler
)
FinalizeResource
(
ctx
context
.
Context
,
dynamoDeployment
*
nvidiacomv1alpha1
.
DynamoGraphDeployment
)
error
{
// for now doing nothing
return
nil
...
...
deploy/operator/internal/discovery/resource.go
View file @
317b9614
...
...
@@ -55,7 +55,7 @@ func GetK8sDiscoveryRole(dgdName string, namespace string) *rbacv1.Role {
Rules
:
[]
rbacv1
.
PolicyRule
{
{
APIGroups
:
[]
string
{
apiGroupCore
},
Resources
:
[]
string
{
"endpoints"
},
Resources
:
[]
string
{
"endpoints"
,
"pods"
},
Verbs
:
[]
string
{
"get"
,
"list"
,
"watch"
},
},
{
...
...
deploy/operator/internal/dynamo/backend_vllm.go
View file @
317b9614
...
...
@@ -2,6 +2,7 @@ package dynamo
import
(
"fmt"
"regexp"
"strconv"
"strings"
...
...
@@ -9,6 +10,7 @@ import (
commonconsts
"github.com/ai-dynamo/dynamo/deploy/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/operator/internal/featuregate"
corev1
"k8s.io/api/core/v1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/log"
)
...
...
@@ -20,7 +22,9 @@ const (
dataParallelSizeFlag
=
"--data-parallel-size"
)
type
VLLMBackend
struct
{}
type
VLLMBackend
struct
{
ParentGraphDeploymentName
string
}
func
(
b
*
VLLMBackend
)
UpdateContainer
(
container
*
corev1
.
Container
,
numberOfNodes
int32
,
role
Role
,
component
*
v1alpha1
.
DynamoComponentDeploymentSharedSpec
,
serviceName
string
,
multinodeDeployer
MultinodeDeployer
)
{
isMultinode
:=
numberOfNodes
>
1
...
...
@@ -78,44 +82,162 @@ func (b *VLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes
}
}
func
(
b
*
VLLMBackend
)
UpdatePodSpec
(
podSpec
*
corev1
.
PodSpec
,
numberOfNodes
int32
,
role
Role
,
component
*
v1alpha1
.
DynamoComponentDeploymentSharedSpec
,
serviceName
string
,
multinodeDeployer
MultinodeDeployer
)
{
if
numberOfNodes
<=
1
||
role
!=
RoleWorker
||
!
shouldUseMpBackend
(
component
.
Annotations
)
{
return
}
const
(
waitLeaderConfigMapSuffix
=
"wait-leader-script"
waitLeaderScriptKey
=
"wait-for-leader.py"
waitLeaderVolumeName
=
"wait-leader-script"
waitLeaderMountPath
=
"/scripts"
)
if
len
(
podSpec
.
Containers
)
==
0
{
return
}
// WaitLeaderScript is the Python script that verifies leader pod health via
// the K8s API before attempting a TCP connection. It reads LEADER_HOST and
// LEADER_PORT from environment variables so the script content is generic.
const
WaitLeaderScript
=
`import socket, time, json, ssl, urllib.request, os
leaderHostname
:=
multinodeDeployer
.
GetLeaderHostname
(
serviceName
)
mainImage
:=
podSpec
.
Containers
[
0
]
.
Image
SA = "/var/run/secrets/kubernetes.io/serviceaccount"
host = os.environ["LEADER_HOST"]
port = int(os.environ["LEADER_PORT"])
def _k8s_ctx():
return ssl.create_default_context(cafile=f"{SA}/ca.crt")
def _k8s_headers():
token = open(f"{SA}/token").read()
return {"Authorization": f"Bearer {token}"}
def _k8s_api():
ns = open(f"{SA}/namespace").read()
return f"https://kubernetes.default.svc/api/v1/namespaces/{ns}/pods"
def leader_pod_is_healthy():
try:
ip = socket.gethostbyname(host)
except socket.gaierror:
return False, "DNS resolution failed", None, None
try:
req = urllib.request.Request(
f"{_k8s_api()}?fieldSelector=status.podIP={ip}",
headers=_k8s_headers(),
)
resp = json.loads(urllib.request.urlopen(req, context=_k8s_ctx(), timeout=5).read())
pods = resp.get("items", [])
if not pods:
return False, f"no pod found with IP {ip}", None, ip
pod = pods[0]
name = pod["metadata"].get("name", "unknown")
uid = pod["metadata"].get("uid", "unknown")
phase = pod.get("status", {}).get("phase")
deletion_ts = pod["metadata"].get("deletionTimestamp")
info = f"ip={ip} pod={name} uid={uid} phase={phase} deletionTimestamp={deletion_ts}"
if deletion_ts:
return False, f"pod {name} is terminating", info, ip
if phase != "Running":
return False, f"pod {name} phase is {phase}", info, ip
return True, "", info, ip
except Exception as e:
# Fall back to TCP-only when the API is unavailable (e.g. 403 no RBAC)
return True, f"K8s API unavailable ({e}), falling back to TCP", f"ip={ip}", ip
waitScript
:=
fmt
.
Sprintf
(
`import socket, time
host, port = "%s", %s
print(f"Waiting for leader master port at {host}:{port}...", flush=True)
time.sleep(5)
start = time.monotonic()
last_status = start
last_err = ""
while True:
try:
s = socket.create_connection((host, port), timeout=2)
s.close()
elapsed = time.monotonic() - start
print(f"Leader master port ready (waited {elapsed:.1f}s)", flush=True)
break
except Exception as e:
last_err = f"{type(e).__name__}: {e}"
healthy, reason, pod_info, leader_ip = leader_pod_is_healthy()
if healthy:
try:
s = socket.create_connection((leader_ip, port), timeout=2)
s.close()
elapsed = time.monotonic() - start
print(f"Leader master port ready (waited {elapsed:.1f}s) [{pod_info}]", flush=True)
break
except Exception as e:
last_err = f"tcp: {type(e).__name__}: {e} [{pod_info}]"
else:
last_err = f"{reason} [{pod_info}]" if pod_info else reason
now = time.monotonic()
if now - last_status >= 30:
print(f"Still waiting for {host}:{port}... ({now - start:.0f}s elapsed, last
error
: {last_err})", flush=True)
print(f"Still waiting for {host}:{port}... ({now - start:.0f}s elapsed, last: {last_err})", flush=True)
last_status = now
time.sleep(
2
)
`
,
leaderHostname
,
commonconsts
.
VLLMMpMasterPort
)
time.sleep(
5
)
`
// k8sVarPattern matches Kubernetes $(VAR) env-var expansion syntax.
var
k8sVarPattern
=
regexp
.
MustCompile
(
`\$\((\w+)\)`
)
// k8sToShellVarSyntax converts Kubernetes $(VAR) references to shell ${VAR}
// so that variables can be expanded by a shell at runtime. Plain $VAR
// references (e.g. from LWS) are already valid shell syntax and left as-is.
func
k8sToShellVarSyntax
(
s
string
)
string
{
return
k8sVarPattern
.
ReplaceAllString
(
s
,
`${$1}`
)
}
// GetWaitLeaderConfigMapName returns the ConfigMap name for a given DGD.
func
GetWaitLeaderConfigMapName
(
dgdName
string
)
string
{
return
fmt
.
Sprintf
(
"%s-%s"
,
dgdName
,
waitLeaderConfigMapSuffix
)
}
// GenerateWaitLeaderConfigMap creates a ConfigMap containing the wait-for-leader
// Python script. One ConfigMap is created per DGD and owned by the DGD.
func
GenerateWaitLeaderConfigMap
(
dgdName
,
namespace
string
)
*
corev1
.
ConfigMap
{
return
&
corev1
.
ConfigMap
{
ObjectMeta
:
metav1
.
ObjectMeta
{
Name
:
GetWaitLeaderConfigMapName
(
dgdName
),
Namespace
:
namespace
,
Labels
:
map
[
string
]
string
{
commonconsts
.
KubeLabelDynamoGraphDeploymentName
:
dgdName
,
},
},
Data
:
map
[
string
]
string
{
waitLeaderScriptKey
:
WaitLeaderScript
,
},
}
}
func
(
b
*
VLLMBackend
)
UpdatePodSpec
(
podSpec
*
corev1
.
PodSpec
,
numberOfNodes
int32
,
role
Role
,
component
*
v1alpha1
.
DynamoComponentDeploymentSharedSpec
,
serviceName
string
,
multinodeDeployer
MultinodeDeployer
)
{
if
numberOfNodes
<=
1
||
role
!=
RoleWorker
||
!
shouldUseMpBackend
(
component
.
Annotations
)
{
return
}
if
len
(
podSpec
.
Containers
)
==
0
||
b
.
ParentGraphDeploymentName
==
""
{
return
}
leaderHostname
:=
multinodeDeployer
.
GetLeaderHostname
(
serviceName
)
mainImage
:=
podSpec
.
Containers
[
0
]
.
Image
cmName
:=
GetWaitLeaderConfigMapName
(
b
.
ParentGraphDeploymentName
)
podSpec
.
Volumes
=
append
(
podSpec
.
Volumes
,
corev1
.
Volume
{
Name
:
waitLeaderVolumeName
,
VolumeSource
:
corev1
.
VolumeSource
{
ConfigMap
:
&
corev1
.
ConfigMapVolumeSource
{
LocalObjectReference
:
corev1
.
LocalObjectReference
{
Name
:
cmName
,
},
},
},
})
// Use sh -c so the shell expands variable references at runtime.
// Grove/LWS env vars are appended to init containers AFTER our env
// vars, so Kubernetes $(VAR) expansion (which is order-dependent)
// cannot resolve them. The shell sees all env vars regardless of
// definition order.
shellHostname
:=
k8sToShellVarSyntax
(
leaderHostname
)
initContainer
:=
corev1
.
Container
{
Name
:
"wait-for-leader-mp"
,
Image
:
mainImage
,
Command
:
[]
string
{
"python3"
,
"-c"
,
waitScript
},
Name
:
"wait-for-leader-mp"
,
Image
:
mainImage
,
Command
:
[]
string
{
"sh"
,
"-c"
,
fmt
.
Sprintf
(
`export LEADER_HOST="%s" LEADER_PORT="%s" && exec python3 %s/%s`
,
shellHostname
,
commonconsts
.
VLLMMpMasterPort
,
waitLeaderMountPath
,
waitLeaderScriptKey
)},
VolumeMounts
:
[]
corev1
.
VolumeMount
{
{
Name
:
waitLeaderVolumeName
,
MountPath
:
waitLeaderMountPath
,
ReadOnly
:
true
,
},
},
}
podSpec
.
InitContainers
=
append
(
podSpec
.
InitContainers
,
initContainer
)
...
...
deploy/operator/internal/dynamo/backend_vllm_test.go
View file @
317b9614
...
...
@@ -560,20 +560,18 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
}
func
TestVLLMBackend_UpdatePodSpec
(
t
*
testing
.
T
)
{
backend
:=
&
VLLMBackend
{}
backend
:=
&
VLLMBackend
{
ParentGraphDeploymentName
:
"test-dgd"
}
tests
:=
[]
struct
{
name
string
numberOfNodes
int32
role
Role
component
*
v1alpha1
.
DynamoComponentDeploymentSharedSpec
multinodeDeployer
MultinodeDeployer
initialPodSpec
*
corev1
.
PodSpec
expectInitContainer
bool
expectedInitName
string
expectedInitImage
string
expectedInitCommandLen
int
expectWaitScriptContent
string
name
string
numberOfNodes
int32
role
Role
component
*
v1alpha1
.
DynamoComponentDeploymentSharedSpec
multinodeDeployer
MultinodeDeployer
initialPodSpec
*
corev1
.
PodSpec
expectInitContainer
bool
expectedInitImage
string
expectedLeaderHost
string
}{
{
name
:
"mp worker with Grove deployer injects init container"
,
...
...
@@ -590,11 +588,9 @@ func TestVLLMBackend_UpdatePodSpec(t *testing.T) {
{
Name
:
"main"
,
Image
:
"vllm:latest"
},
},
},
expectInitContainer
:
true
,
expectedInitName
:
"wait-for-leader-mp"
,
expectedInitImage
:
"vllm:latest"
,
expectedInitCommandLen
:
3
,
expectWaitScriptContent
:
"$(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE)"
,
expectInitContainer
:
true
,
expectedInitImage
:
"vllm:latest"
,
expectedLeaderHost
:
"${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE}"
,
},
{
name
:
"mp worker with LWS deployer injects init container"
,
...
...
@@ -611,11 +607,9 @@ func TestVLLMBackend_UpdatePodSpec(t *testing.T) {
{
Name
:
"main"
,
Image
:
"vllm:v2"
},
},
},
expectInitContainer
:
true
,
expectedInitName
:
"wait-for-leader-mp"
,
expectedInitImage
:
"vllm:v2"
,
expectedInitCommandLen
:
3
,
expectWaitScriptContent
:
"$LWS_LEADER_ADDRESS"
,
expectInitContainer
:
true
,
expectedInitImage
:
"vllm:v2"
,
expectedLeaderHost
:
"$LWS_LEADER_ADDRESS"
,
},
{
name
:
"mp leader does not inject init container"
,
...
...
@@ -682,11 +676,9 @@ func TestVLLMBackend_UpdatePodSpec(t *testing.T) {
{
Name
:
"main"
,
Image
:
"vllm:latest"
},
},
},
expectInitContainer
:
true
,
expectedInitName
:
"wait-for-leader-mp"
,
expectedInitImage
:
"vllm:latest"
,
expectedInitCommandLen
:
3
,
expectWaitScriptContent
:
"$(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE)"
,
expectInitContainer
:
true
,
expectedInitImage
:
"vllm:latest"
,
expectedLeaderHost
:
"${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE}"
,
},
}
...
...
@@ -695,27 +687,66 @@ func TestVLLMBackend_UpdatePodSpec(t *testing.T) {
g
:=
gomega
.
NewGomegaWithT
(
t
)
initialInitCount
:=
len
(
tt
.
initialPodSpec
.
InitContainers
)
initialVolCount
:=
len
(
tt
.
initialPodSpec
.
Volumes
)
backend
.
UpdatePodSpec
(
tt
.
initialPodSpec
,
tt
.
numberOfNodes
,
tt
.
role
,
tt
.
component
,
"test-service"
,
tt
.
multinodeDeployer
)
if
tt
.
expectInitContainer
{
g
.
Expect
(
len
(
tt
.
initialPodSpec
.
InitContainers
))
.
To
(
gomega
.
Equal
(
initialInitCount
+
1
))
g
.
Expect
(
tt
.
initialPodSpec
.
InitContainers
)
.
To
(
gomega
.
HaveLen
(
initialInitCount
+
1
))
g
.
Expect
(
tt
.
initialPodSpec
.
Volumes
)
.
To
(
gomega
.
HaveLen
(
initialVolCount
+
1
))
injected
:=
tt
.
initialPodSpec
.
InitContainers
[
len
(
tt
.
initialPodSpec
.
InitContainers
)
-
1
]
g
.
Expect
(
injected
.
Name
)
.
To
(
gomega
.
Equal
(
tt
.
expectedInitName
))
g
.
Expect
(
injected
.
Name
)
.
To
(
gomega
.
Equal
(
"wait-for-leader-mp"
))
g
.
Expect
(
injected
.
Image
)
.
To
(
gomega
.
Equal
(
tt
.
expectedInitImage
))
g
.
Expect
(
len
(
injected
.
Command
))
.
To
(
gomega
.
Equal
(
tt
.
expectedInitCommandLen
))
g
.
Expect
(
injected
.
Command
[
0
])
.
To
(
gomega
.
Equal
(
"python3"
))
g
.
Expect
(
injected
.
Command
[
1
])
.
To
(
gomega
.
Equal
(
"-c"
))
g
.
Expect
(
injected
.
Command
[
2
])
.
To
(
gomega
.
ContainSubstring
(
tt
.
expectWaitScriptContent
))
g
.
Expect
(
injected
.
Command
[
2
])
.
To
(
gomega
.
ContainSubstring
(
"socket.create_connection"
))
g
.
Expect
(
injected
.
Command
[
2
])
.
To
(
gomega
.
ContainSubstring
(
commonconsts
.
VLLMMpMasterPort
))
expectedCmd
:=
fmt
.
Sprintf
(
`export LEADER_HOST="%s" LEADER_PORT="%s" && exec python3 /scripts/wait-for-leader.py`
,
tt
.
expectedLeaderHost
,
commonconsts
.
VLLMMpMasterPort
)
g
.
Expect
(
injected
.
Command
)
.
To
(
gomega
.
Equal
([]
string
{
"sh"
,
"-c"
,
expectedCmd
}))
g
.
Expect
(
injected
.
Env
)
.
To
(
gomega
.
BeEmpty
())
g
.
Expect
(
injected
.
VolumeMounts
)
.
To
(
gomega
.
HaveLen
(
1
))
g
.
Expect
(
injected
.
VolumeMounts
[
0
]
.
Name
)
.
To
(
gomega
.
Equal
(
"wait-leader-script"
))
g
.
Expect
(
injected
.
VolumeMounts
[
0
]
.
MountPath
)
.
To
(
gomega
.
Equal
(
"/scripts"
))
g
.
Expect
(
injected
.
VolumeMounts
[
0
]
.
ReadOnly
)
.
To
(
gomega
.
BeTrue
())
vol
:=
tt
.
initialPodSpec
.
Volumes
[
len
(
tt
.
initialPodSpec
.
Volumes
)
-
1
]
g
.
Expect
(
vol
.
Name
)
.
To
(
gomega
.
Equal
(
"wait-leader-script"
))
g
.
Expect
(
vol
.
ConfigMap
)
.
ToNot
(
gomega
.
BeNil
())
g
.
Expect
(
vol
.
ConfigMap
.
Name
)
.
To
(
gomega
.
Equal
(
"test-dgd-wait-leader-script"
))
}
else
{
g
.
Expect
(
len
(
tt
.
initialPodSpec
.
InitContainers
))
.
To
(
gomega
.
Equal
(
initialInitCount
))
g
.
Expect
(
tt
.
initialPodSpec
.
InitContainers
)
.
To
(
gomega
.
HaveLen
(
initialInitCount
))
g
.
Expect
(
tt
.
initialPodSpec
.
Volumes
)
.
To
(
gomega
.
HaveLen
(
initialVolCount
))
}
})
}
}
func
TestGenerateWaitLeaderConfigMap
(
t
*
testing
.
T
)
{
g
:=
gomega
.
NewGomegaWithT
(
t
)
cm
:=
GenerateWaitLeaderConfigMap
(
"my-dgd"
,
"my-ns"
)
g
.
Expect
(
cm
.
Name
)
.
To
(
gomega
.
Equal
(
"my-dgd-wait-leader-script"
))
g
.
Expect
(
cm
.
Namespace
)
.
To
(
gomega
.
Equal
(
"my-ns"
))
g
.
Expect
(
cm
.
Labels
)
.
To
(
gomega
.
HaveKeyWithValue
(
commonconsts
.
KubeLabelDynamoGraphDeploymentName
,
"my-dgd"
))
g
.
Expect
(
cm
.
Data
)
.
To
(
gomega
.
HaveKey
(
"wait-for-leader.py"
))
script
:=
cm
.
Data
[
"wait-for-leader.py"
]
g
.
Expect
(
script
)
.
To
(
gomega
.
ContainSubstring
(
`os.environ["LEADER_HOST"]`
))
g
.
Expect
(
script
)
.
To
(
gomega
.
ContainSubstring
(
`os.environ["LEADER_PORT"]`
))
g
.
Expect
(
script
)
.
To
(
gomega
.
ContainSubstring
(
"leader_pod_is_healthy"
))
g
.
Expect
(
script
)
.
To
(
gomega
.
ContainSubstring
(
"kubernetes.default.svc"
))
g
.
Expect
(
script
)
.
To
(
gomega
.
ContainSubstring
(
"fieldSelector=status.podIP="
))
g
.
Expect
(
script
)
.
To
(
gomega
.
ContainSubstring
(
"deletionTimestamp"
))
g
.
Expect
(
script
)
.
To
(
gomega
.
ContainSubstring
(
"socket.create_connection"
))
g
.
Expect
(
script
)
.
To
(
gomega
.
ContainSubstring
(
"time.sleep(5)"
))
}
func
TestGetWaitLeaderConfigMapName
(
t
*
testing
.
T
)
{
g
:=
gomega
.
NewGomegaWithT
(
t
)
g
.
Expect
(
GetWaitLeaderConfigMapName
(
"foo"
))
.
To
(
gomega
.
Equal
(
"foo-wait-leader-script"
))
}
func
TestShouldUseMpBackend
(
t
*
testing
.
T
)
{
// Version-based gate behavior is tested in featuregate.TestOperatorOriginFeatureGate_IsEnabled.
// These tests focus on the explicit override logic and its interaction with the feature gate.
...
...
deploy/operator/internal/dynamo/graph.go
View file @
317b9614
...
...
@@ -895,12 +895,12 @@ type MultinodeDeployer interface {
}
// BackendFactory creates backend instances based on the framework type
func
BackendFactory
(
backendFramework
BackendFramework
,
operatorConfig
*
configv1alpha1
.
OperatorConfiguration
)
Backend
{
func
BackendFactory
(
backendFramework
BackendFramework
,
operatorConfig
*
configv1alpha1
.
OperatorConfiguration
,
parentGraphDeploymentName
string
)
Backend
{
switch
backendFramework
{
case
BackendFrameworkSGLang
:
return
&
SGLangBackend
{}
case
BackendFrameworkVLLM
:
return
&
VLLMBackend
{}
return
&
VLLMBackend
{
ParentGraphDeploymentName
:
parentGraphDeploymentName
}
case
BackendFrameworkTRTLLM
:
return
&
TRTLLMBackend
{
MpiRunSecretName
:
operatorConfig
.
MPI
.
SSHSecretName
,
...
...
@@ -1121,7 +1121,7 @@ func GenerateBasePodSpec(
if
multinodeDeployer
==
nil
{
return
nil
,
fmt
.
Errorf
(
"unsupported multinode deployment type: %s"
,
multinodeDeploymentType
)
}
backend
:=
BackendFactory
(
backendFramework
,
operatorConfig
)
backend
:=
BackendFactory
(
backendFramework
,
operatorConfig
,
parentGraphDeploymentName
)
if
backend
==
nil
{
return
nil
,
fmt
.
Errorf
(
"unsupported backend framework: %s"
,
backendFramework
)
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment