Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
d152596a
Unverified
Commit
d152596a
authored
Feb 27, 2026
by
MatejKosec
Committed by
GitHub
Feb 28, 2026
Browse files
fix(operator): fix TRT-LLM worker SSH crash in non-root containers (#6694)
Signed-off-by:
Matej Kosec
<
mkosec@nvidia.com
>
parent
c68052ff
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
4 deletions
+14
-4
deploy/operator/internal/dynamo/backend_trtllm.go
deploy/operator/internal/dynamo/backend_trtllm.go
+11
-1
deploy/operator/internal/dynamo/backend_trtllm_test.go
deploy/operator/internal/dynamo/backend_trtllm_test.go
+3
-3
No files found.
deploy/operator/internal/dynamo/backend_trtllm.go
View file @
d152596a
...
...
@@ -17,6 +17,10 @@ type TRTLLMBackend struct {
MpiRunSecretName
string
}
// UpdateContainer configures the container for TRT-LLM multinode deployments.
// For single-node deployments it is a no-op. For multinode, it mounts the SSH
// keypair secret and injects the appropriate SSH setup and launch commands for
// leader (mpirun) and worker (sshd) roles.
func
(
b
*
TRTLLMBackend
)
UpdateContainer
(
container
*
corev1
.
Container
,
numberOfNodes
int32
,
role
Role
,
component
*
v1alpha1
.
DynamoComponentDeploymentSharedSpec
,
serviceName
string
,
multinodeDeployer
MultinodeDeployer
)
{
// Check for volumeMounts with useAsCompilationCache=true
for
_
,
volumeMount
:=
range
component
.
VolumeMounts
{
...
...
@@ -74,6 +78,8 @@ func (b *TRTLLMBackend) UpdateContainer(container *corev1.Container, numberOfNod
}
}
// UpdatePodSpec injects the SSH keypair volume into the pod spec for TRT-LLM
// multinode deployments so that leader and worker containers can mount it.
func
(
b
*
TRTLLMBackend
)
UpdatePodSpec
(
podSpec
*
corev1
.
PodSpec
,
numberOfNodes
int32
,
role
Role
,
component
*
v1alpha1
.
DynamoComponentDeploymentSharedSpec
,
serviceName
string
,
multinodeDeployer
MultinodeDeployer
)
{
// Add SSH keypair volume for TRTLLM multinode deployments
if
numberOfNodes
>
1
{
...
...
@@ -187,7 +193,7 @@ func (b *TRTLLMBackend) setupWorkerContainer(container *corev1.Container) {
// Setup SSH for worker nodes
// Use $HOME instead of ~ for the same reasons as setupLeaderContainer (see comment above).
sshSetupCommands
:=
[]
string
{
"mkdir -p $HOME/.ssh $HOME/.ssh/host_keys $HOME/.ssh/run
/run/sshd
"
,
"mkdir -p $HOME/.ssh $HOME/.ssh/host_keys $HOME/.ssh/run"
,
"ls -la /ssh-pk/"
,
// Debug: list files in ssh-pk directory
"cp /ssh-pk/private.key $HOME/.ssh/id_rsa"
,
"cp /ssh-pk/private.key.pub $HOME/.ssh/id_rsa.pub"
,
...
...
@@ -206,6 +212,10 @@ func (b *TRTLLMBackend) setupWorkerContainer(container *corev1.Container) {
// relative paths from the connecting user's /etc/passwd home (-> /root/).
// StrictModes disabled because /home/dynamo may be owned by a non-root UID
// while sshd runs as root, causing permission check failures.
// Note: /run/sshd (the privilege separation directory) is not needed here
// because sshd started as a non-root user skips the privsep directory check
// entirely — privsep requires forking a privileged monitor process, which is
// only possible when sshd starts as UID 0.
fmt
.
Sprintf
(
"printf 'Port %d
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_rsa_key
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_ecdsa_key
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_ed25519_key
\\
nPidFile '$HOME'/.ssh/run/sshd.pid
\\
nStrictModes no
\\
nPermitRootLogin yes
\\
nPasswordAuthentication no
\\
nPubkeyAuthentication yes
\\
nAuthorizedKeysFile '$HOME'/.ssh/authorized_keys
\\
n' > $HOME/.ssh/sshd_config"
,
commonconsts
.
MpiRunSshPort
),
"/usr/sbin/sshd -D -f $HOME/.ssh/sshd_config"
,
}
...
...
deploy/operator/internal/dynamo/backend_trtllm_test.go
View file @
d152596a
...
...
@@ -180,7 +180,7 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
{
Name
:
mpiRunSecretName
,
MountPath
:
"/ssh-pk"
,
ReadOnly
:
true
},
},
expectedCommand
:
[]
string
{
"/bin/sh"
,
"-c"
},
expectedArgs
:
[]
string
{
"mkdir -p $HOME/.ssh $HOME/.ssh/host_keys $HOME/.ssh/run
/run/sshd
&& ls -la /ssh-pk/ && cp /ssh-pk/private.key $HOME/.ssh/id_rsa && cp /ssh-pk/private.key.pub $HOME/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub $HOME/.ssh/authorized_keys && chmod 600 $HOME/.ssh/id_rsa $HOME/.ssh/authorized_keys && chmod 644 $HOME/.ssh/id_rsa.pub && printf 'Host *
\\
nIdentityFile '$HOME'/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > $HOME/.ssh/config && ssh-keygen -t rsa -f $HOME/.ssh/host_keys/ssh_host_rsa_key -N '' && ssh-keygen -t ecdsa -f $HOME/.ssh/host_keys/ssh_host_ecdsa_key -N '' && ssh-keygen -t ed25519 -f $HOME/.ssh/host_keys/ssh_host_ed25519_key -N '' && printf 'Port 2222
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_rsa_key
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_ecdsa_key
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_ed25519_key
\\
nPidFile '$HOME'/.ssh/run/sshd.pid
\\
nStrictModes no
\\
nPermitRootLogin yes
\\
nPasswordAuthentication no
\\
nPubkeyAuthentication yes
\\
nAuthorizedKeysFile '$HOME'/.ssh/authorized_keys
\\
n' > $HOME/.ssh/sshd_config && /usr/sbin/sshd -D -f $HOME/.ssh/sshd_config"
},
expectedArgs
:
[]
string
{
"mkdir -p $HOME/.ssh $HOME/.ssh/host_keys $HOME/.ssh/run && ls -la /ssh-pk/ && cp /ssh-pk/private.key $HOME/.ssh/id_rsa && cp /ssh-pk/private.key.pub $HOME/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub $HOME/.ssh/authorized_keys && chmod 600 $HOME/.ssh/id_rsa $HOME/.ssh/authorized_keys && chmod 644 $HOME/.ssh/id_rsa.pub && printf 'Host *
\\
nIdentityFile '$HOME'/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > $HOME/.ssh/config && ssh-keygen -t rsa -f $HOME/.ssh/host_keys/ssh_host_rsa_key -N '' && ssh-keygen -t ecdsa -f $HOME/.ssh/host_keys/ssh_host_ecdsa_key -N '' && ssh-keygen -t ed25519 -f $HOME/.ssh/host_keys/ssh_host_ed25519_key -N '' && printf 'Port 2222
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_rsa_key
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_ecdsa_key
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_ed25519_key
\\
nPidFile '$HOME'/.ssh/run/sshd.pid
\\
nStrictModes no
\\
nPermitRootLogin yes
\\
nPasswordAuthentication no
\\
nPubkeyAuthentication yes
\\
nAuthorizedKeysFile '$HOME'/.ssh/authorized_keys
\\
n' > $HOME/.ssh/sshd_config && /usr/sbin/sshd -D -f $HOME/.ssh/sshd_config"
},
expectedEnv
:
[]
corev1
.
EnvVar
{
{
Name
:
"OMPI_MCA_orte_keep_fqdn_hostnames"
,
Value
:
"1"
},
},
...
...
@@ -807,13 +807,13 @@ func TestTRTLLMBackend_setupWorkerContainer(t *testing.T) {
name
:
"Worker setup with initial args"
,
initialArgs
:
[]
string
{
"some"
,
"args"
},
initialCommand
:
[]
string
{},
expected
:
"mkdir -p $HOME/.ssh $HOME/.ssh/host_keys $HOME/.ssh/run
/run/sshd
&& ls -la /ssh-pk/ && cp /ssh-pk/private.key $HOME/.ssh/id_rsa && cp /ssh-pk/private.key.pub $HOME/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub $HOME/.ssh/authorized_keys && chmod 600 $HOME/.ssh/id_rsa $HOME/.ssh/authorized_keys && chmod 644 $HOME/.ssh/id_rsa.pub && printf 'Host *
\\
nIdentityFile '$HOME'/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > $HOME/.ssh/config && ssh-keygen -t rsa -f $HOME/.ssh/host_keys/ssh_host_rsa_key -N '' && ssh-keygen -t ecdsa -f $HOME/.ssh/host_keys/ssh_host_ecdsa_key -N '' && ssh-keygen -t ed25519 -f $HOME/.ssh/host_keys/ssh_host_ed25519_key -N '' && printf 'Port 2222
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_rsa_key
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_ecdsa_key
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_ed25519_key
\\
nPidFile '$HOME'/.ssh/run/sshd.pid
\\
nStrictModes no
\\
nPermitRootLogin yes
\\
nPasswordAuthentication no
\\
nPubkeyAuthentication yes
\\
nAuthorizedKeysFile '$HOME'/.ssh/authorized_keys
\\
n' > $HOME/.ssh/sshd_config && /usr/sbin/sshd -D -f $HOME/.ssh/sshd_config"
,
expected
:
"mkdir -p $HOME/.ssh $HOME/.ssh/host_keys $HOME/.ssh/run && ls -la /ssh-pk/ && cp /ssh-pk/private.key $HOME/.ssh/id_rsa && cp /ssh-pk/private.key.pub $HOME/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub $HOME/.ssh/authorized_keys && chmod 600 $HOME/.ssh/id_rsa $HOME/.ssh/authorized_keys && chmod 644 $HOME/.ssh/id_rsa.pub && printf 'Host *
\\
nIdentityFile '$HOME'/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > $HOME/.ssh/config && ssh-keygen -t rsa -f $HOME/.ssh/host_keys/ssh_host_rsa_key -N '' && ssh-keygen -t ecdsa -f $HOME/.ssh/host_keys/ssh_host_ecdsa_key -N '' && ssh-keygen -t ed25519 -f $HOME/.ssh/host_keys/ssh_host_ed25519_key -N '' && printf 'Port 2222
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_rsa_key
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_ecdsa_key
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_ed25519_key
\\
nPidFile '$HOME'/.ssh/run/sshd.pid
\\
nStrictModes no
\\
nPermitRootLogin yes
\\
nPasswordAuthentication no
\\
nPubkeyAuthentication yes
\\
nAuthorizedKeysFile '$HOME'/.ssh/authorized_keys
\\
n' > $HOME/.ssh/sshd_config && /usr/sbin/sshd -D -f $HOME/.ssh/sshd_config"
,
},
{
name
:
"Worker setup with initial command"
,
initialArgs
:
[]
string
{},
initialCommand
:
[]
string
{
"original"
,
"command"
},
expected
:
"mkdir -p $HOME/.ssh $HOME/.ssh/host_keys $HOME/.ssh/run
/run/sshd
&& ls -la /ssh-pk/ && cp /ssh-pk/private.key $HOME/.ssh/id_rsa && cp /ssh-pk/private.key.pub $HOME/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub $HOME/.ssh/authorized_keys && chmod 600 $HOME/.ssh/id_rsa $HOME/.ssh/authorized_keys && chmod 644 $HOME/.ssh/id_rsa.pub && printf 'Host *
\\
nIdentityFile '$HOME'/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > $HOME/.ssh/config && ssh-keygen -t rsa -f $HOME/.ssh/host_keys/ssh_host_rsa_key -N '' && ssh-keygen -t ecdsa -f $HOME/.ssh/host_keys/ssh_host_ecdsa_key -N '' && ssh-keygen -t ed25519 -f $HOME/.ssh/host_keys/ssh_host_ed25519_key -N '' && printf 'Port 2222
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_rsa_key
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_ecdsa_key
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_ed25519_key
\\
nPidFile '$HOME'/.ssh/run/sshd.pid
\\
nStrictModes no
\\
nPermitRootLogin yes
\\
nPasswordAuthentication no
\\
nPubkeyAuthentication yes
\\
nAuthorizedKeysFile '$HOME'/.ssh/authorized_keys
\\
n' > $HOME/.ssh/sshd_config && /usr/sbin/sshd -D -f $HOME/.ssh/sshd_config"
,
expected
:
"mkdir -p $HOME/.ssh $HOME/.ssh/host_keys $HOME/.ssh/run && ls -la /ssh-pk/ && cp /ssh-pk/private.key $HOME/.ssh/id_rsa && cp /ssh-pk/private.key.pub $HOME/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub $HOME/.ssh/authorized_keys && chmod 600 $HOME/.ssh/id_rsa $HOME/.ssh/authorized_keys && chmod 644 $HOME/.ssh/id_rsa.pub && printf 'Host *
\\
nIdentityFile '$HOME'/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > $HOME/.ssh/config && ssh-keygen -t rsa -f $HOME/.ssh/host_keys/ssh_host_rsa_key -N '' && ssh-keygen -t ecdsa -f $HOME/.ssh/host_keys/ssh_host_ecdsa_key -N '' && ssh-keygen -t ed25519 -f $HOME/.ssh/host_keys/ssh_host_ed25519_key -N '' && printf 'Port 2222
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_rsa_key
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_ecdsa_key
\\
nHostKey '$HOME'/.ssh/host_keys/ssh_host_ed25519_key
\\
nPidFile '$HOME'/.ssh/run/sshd.pid
\\
nStrictModes no
\\
nPermitRootLogin yes
\\
nPasswordAuthentication no
\\
nPubkeyAuthentication yes
\\
nAuthorizedKeysFile '$HOME'/.ssh/authorized_keys
\\
n' > $HOME/.ssh/sshd_config && /usr/sbin/sshd -D -f $HOME/.ssh/sshd_config"
,
},
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment