Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
45e38d3d
Unverified
Commit
45e38d3d
authored
Aug 26, 2025
by
Michael Gathara
Committed by
GitHub
Aug 26, 2025
Browse files
feat: HF_ENDPOINT addition (#2637)
parent
889d6529
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
8 deletions
+8
-8
deploy/cloud/operator/internal/dynamo/backend_trtllm.go
deploy/cloud/operator/internal/dynamo/backend_trtllm.go
+1
-1
deploy/cloud/operator/internal/dynamo/backend_trtllm_test.go
deploy/cloud/operator/internal/dynamo/backend_trtllm_test.go
+7
-7
No files found.
deploy/cloud/operator/internal/dynamo/backend_trtllm.go
View file @
45e38d3d
...
@@ -188,7 +188,7 @@ func getGPUsPerNode(resources *common.Resources) int32 {
...
@@ -188,7 +188,7 @@ func getGPUsPerNode(resources *common.Resources) int32 {
// getCommonTRTLLMEnvVars returns a map of common environment variables for TRTLLM deployments
// getCommonTRTLLMEnvVars returns a map of common environment variables for TRTLLM deployments
func
getCommonTRTLLMEnvVars
()
map
[
string
]
bool
{
func
getCommonTRTLLMEnvVars
()
map
[
string
]
bool
{
return
map
[
string
]
bool
{
return
map
[
string
]
bool
{
"CUDA_VISIBLE_DEVICES"
:
true
,
"MODEL_PATH"
:
true
,
"HF_TOKEN"
:
true
,
"HUGGING_FACE_HUB_TOKEN"
:
true
,
"CUDA_VISIBLE_DEVICES"
:
true
,
"MODEL_PATH"
:
true
,
"HF_TOKEN"
:
true
,
"HUGGING_FACE_HUB_TOKEN"
:
true
,
"HF_ENDPOINT"
:
true
,
"TOKENIZERS_PARALLELISM"
:
true
,
"NCCL_DEBUG"
:
true
,
"NCCL_IB_DISABLE"
:
true
,
"NCCL_P2P_DISABLE"
:
true
,
"TOKENIZERS_PARALLELISM"
:
true
,
"NCCL_DEBUG"
:
true
,
"NCCL_IB_DISABLE"
:
true
,
"NCCL_P2P_DISABLE"
:
true
,
"TENSORRT_LLM_CACHE_DIR"
:
true
,
"HF_HOME"
:
true
,
"TRANSFORMERS_CACHE"
:
true
,
"HF_DATASETS_CACHE"
:
true
,
"TENSORRT_LLM_CACHE_DIR"
:
true
,
"HF_HOME"
:
true
,
"TRANSFORMERS_CACHE"
:
true
,
"HF_DATASETS_CACHE"
:
true
,
"PATH"
:
true
,
"LD_LIBRARY_PATH"
:
true
,
"PYTHONPATH"
:
true
,
"HOME"
:
true
,
"USER"
:
true
,
"PATH"
:
true
,
"LD_LIBRARY_PATH"
:
true
,
"PYTHONPATH"
:
true
,
"HOME"
:
true
,
"USER"
:
true
,
...
...
deploy/cloud/operator/internal/dynamo/backend_trtllm_test.go
View file @
45e38d3d
...
@@ -60,7 +60,7 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
...
@@ -60,7 +60,7 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
{
Name
:
commonconsts
.
MpiRunSshSecretName
,
MountPath
:
"/ssh-pk"
,
ReadOnly
:
true
},
{
Name
:
commonconsts
.
MpiRunSshSecretName
,
MountPath
:
"/ssh-pk"
,
ReadOnly
:
true
},
},
},
expectedCommand
:
[]
string
{
"/bin/sh"
,
"-c"
},
expectedCommand
:
[]
string
{
"/bin/sh"
,
"-c"
},
expectedArgs
:
[]
string
{
"mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *
\\
nIdentityFile ~/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > ~/.ssh/config && mpirun --oversubscribe -n 6 -H ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE},${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-wkr-0.${GROVE_HEADLESS_SERVICE},${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-wkr-1.${GROVE_HEADLESS_SERVICE} --mca pml ob1 --mca plm_rsh_args
\"
-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa
\"
-x CUDA_VISIBLE_DEVICES -x HF_DATASETS_CACHE -x HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x OMPI_MCA_orte_keep_fqdn_hostnames -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch python3 --model test'"
},
expectedArgs
:
[]
string
{
"mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *
\\
nIdentityFile ~/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > ~/.ssh/config && mpirun --oversubscribe -n 6 -H ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE},${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-wkr-0.${GROVE_HEADLESS_SERVICE},${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-wkr-1.${GROVE_HEADLESS_SERVICE} --mca pml ob1 --mca plm_rsh_args
\"
-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa
\"
-x CUDA_VISIBLE_DEVICES -x HF_DATASETS_CACHE -x
HF_ENDPOINT -x
HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x OMPI_MCA_orte_keep_fqdn_hostnames -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch python3 --model test'"
},
expectedEnv
:
[]
corev1
.
EnvVar
{
expectedEnv
:
[]
corev1
.
EnvVar
{
{
Name
:
"OMPI_MCA_orte_keep_fqdn_hostnames"
,
Value
:
"1"
},
{
Name
:
"OMPI_MCA_orte_keep_fqdn_hostnames"
,
Value
:
"1"
},
},
},
...
@@ -116,7 +116,7 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
...
@@ -116,7 +116,7 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
{
Name
:
commonconsts
.
MpiRunSshSecretName
,
MountPath
:
"/ssh-pk"
,
ReadOnly
:
true
},
{
Name
:
commonconsts
.
MpiRunSshSecretName
,
MountPath
:
"/ssh-pk"
,
ReadOnly
:
true
},
},
},
expectedCommand
:
[]
string
{
"/bin/sh"
,
"-c"
},
expectedCommand
:
[]
string
{
"/bin/sh"
,
"-c"
},
expectedArgs
:
[]
string
{
"mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *
\\
nIdentityFile ~/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > ~/.ssh/config && mpirun --oversubscribe -n 2 -H ${LWS_LEADER_ADDRESS},${LWS_WORKER_1_ADDRESS} --mca pml ob1 --mca plm_rsh_args
\"
-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa
\"
-x CUDA_VISIBLE_DEVICES -x HF_DATASETS_CACHE -x HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x OMPI_MCA_orte_keep_fqdn_hostnames -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch python3 --model test'"
},
expectedArgs
:
[]
string
{
"mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *
\\
nIdentityFile ~/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > ~/.ssh/config && mpirun --oversubscribe -n 2 -H ${LWS_LEADER_ADDRESS},${LWS_WORKER_1_ADDRESS} --mca pml ob1 --mca plm_rsh_args
\"
-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa
\"
-x CUDA_VISIBLE_DEVICES -x HF_DATASETS_CACHE -x
HF_ENDPOINT -x
HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x OMPI_MCA_orte_keep_fqdn_hostnames -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch python3 --model test'"
},
expectedEnv
:
[]
corev1
.
EnvVar
{
expectedEnv
:
[]
corev1
.
EnvVar
{
{
Name
:
"OMPI_MCA_orte_keep_fqdn_hostnames"
,
Value
:
"1"
},
{
Name
:
"OMPI_MCA_orte_keep_fqdn_hostnames"
,
Value
:
"1"
},
},
},
...
@@ -563,7 +563,7 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
...
@@ -563,7 +563,7 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
},
},
initialArgs
:
[]
string
{
"python3"
,
"--model"
,
"test"
},
initialArgs
:
[]
string
{
"python3"
,
"--model"
,
"test"
},
initialCommand
:
[]
string
{},
initialCommand
:
[]
string
{},
expected
:
"mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *
\\
nIdentityFile ~/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > ~/.ssh/config && mpirun --oversubscribe -n 6 -H ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE},${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-wkr-0.${GROVE_HEADLESS_SERVICE},${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-wkr-1.${GROVE_HEADLESS_SERVICE} --mca pml ob1 --mca plm_rsh_args
\"
-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa
\"
-x CUDA_VISIBLE_DEVICES -x HF_DATASETS_CACHE -x HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch python3 --model test'"
,
expected
:
"mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *
\\
nIdentityFile ~/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > ~/.ssh/config && mpirun --oversubscribe -n 6 -H ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE},${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-wkr-0.${GROVE_HEADLESS_SERVICE},${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-wkr-1.${GROVE_HEADLESS_SERVICE} --mca pml ob1 --mca plm_rsh_args
\"
-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa
\"
-x CUDA_VISIBLE_DEVICES -x HF_DATASETS_CACHE -x
HF_ENDPOINT -x
HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch python3 --model test'"
,
},
},
{
{
name
:
"Leader with command and no GPU resources"
,
name
:
"Leader with command and no GPU resources"
,
...
@@ -573,7 +573,7 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
...
@@ -573,7 +573,7 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
component
:
&
v1alpha1
.
DynamoComponentDeploymentOverridesSpec
{},
component
:
&
v1alpha1
.
DynamoComponentDeploymentOverridesSpec
{},
initialArgs
:
[]
string
{},
initialArgs
:
[]
string
{},
initialCommand
:
[]
string
{
"python"
,
"-m"
,
"worker"
},
initialCommand
:
[]
string
{
"python"
,
"-m"
,
"worker"
},
expected
:
"mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *
\\
nIdentityFile ~/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > ~/.ssh/config && mpirun --oversubscribe -n 0 -H ${LWS_LEADER_ADDRESS},${LWS_WORKER_1_ADDRESS} --mca pml ob1 --mca plm_rsh_args
\"
-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa
\"
-x CUDA_VISIBLE_DEVICES -x HF_DATASETS_CACHE -x HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch python -m worker'"
,
expected
:
"mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *
\\
nIdentityFile ~/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > ~/.ssh/config && mpirun --oversubscribe -n 0 -H ${LWS_LEADER_ADDRESS},${LWS_WORKER_1_ADDRESS} --mca pml ob1 --mca plm_rsh_args
\"
-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa
\"
-x CUDA_VISIBLE_DEVICES -x HF_DATASETS_CACHE -x
HF_ENDPOINT -x
HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch python -m worker'"
,
},
},
{
{
name
:
"Leader with both command and args (args take precedence)"
,
name
:
"Leader with both command and args (args take precedence)"
,
...
@@ -591,7 +591,7 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
...
@@ -591,7 +591,7 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
},
},
initialArgs
:
[]
string
{
"launch"
,
"--config"
,
"test.yaml"
},
initialArgs
:
[]
string
{
"launch"
,
"--config"
,
"test.yaml"
},
initialCommand
:
[]
string
{
"ignored-command"
},
initialCommand
:
[]
string
{
"ignored-command"
},
expected
:
"mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *
\\
nIdentityFile ~/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > ~/.ssh/config && mpirun --oversubscribe -n 2 -H ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-ldr-0.${GROVE_HEADLESS_SERVICE},${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-wkr-0.${GROVE_HEADLESS_SERVICE} --mca pml ob1 --mca plm_rsh_args
\"
-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa
\"
-x CUDA_VISIBLE_DEVICES -x HF_DATASETS_CACHE -x HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch launch --config test.yaml'"
,
expected
:
"mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *
\\
nIdentityFile ~/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > ~/.ssh/config && mpirun --oversubscribe -n 2 -H ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-ldr-0.${GROVE_HEADLESS_SERVICE},${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-wkr-0.${GROVE_HEADLESS_SERVICE} --mca pml ob1 --mca plm_rsh_args
\"
-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa
\"
-x CUDA_VISIBLE_DEVICES -x HF_DATASETS_CACHE -x
HF_ENDPOINT -x
HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch launch --config test.yaml'"
,
},
},
{
{
name
:
"Leader with all environment variables forwarded"
,
name
:
"Leader with all environment variables forwarded"
,
...
@@ -609,7 +609,7 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
...
@@ -609,7 +609,7 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
},
},
initialArgs
:
[]
string
{
"serve"
,
"--model"
,
"test"
},
initialArgs
:
[]
string
{
"serve"
,
"--model"
,
"test"
},
initialCommand
:
[]
string
{},
initialCommand
:
[]
string
{},
expected
:
"mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *
\\
nIdentityFile ~/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > ~/.ssh/config && mpirun --oversubscribe -n 2 -H ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-ldr-0.${GROVE_HEADLESS_SERVICE},${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-wkr-0.${GROVE_HEADLESS_SERVICE} --mca pml ob1 --mca plm_rsh_args
\"
-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa
\"
-x CUDA_VISIBLE_DEVICES -x HF_DATASETS_CACHE -x HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch serve --model test'"
,
expected
:
"mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *
\\
nIdentityFile ~/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > ~/.ssh/config && mpirun --oversubscribe -n 2 -H ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-ldr-0.${GROVE_HEADLESS_SERVICE},${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-wkr-0.${GROVE_HEADLESS_SERVICE} --mca pml ob1 --mca plm_rsh_args
\"
-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa
\"
-x CUDA_VISIBLE_DEVICES -x HF_DATASETS_CACHE -x
HF_ENDPOINT -x
HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch serve --model test'"
,
},
},
{
{
name
:
"Leader with overlapping environment variables (deduplication test)"
,
name
:
"Leader with overlapping environment variables (deduplication test)"
,
...
@@ -627,7 +627,7 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
...
@@ -627,7 +627,7 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
},
},
initialArgs
:
[]
string
{
"serve"
,
"--model"
,
"test"
},
initialArgs
:
[]
string
{
"serve"
,
"--model"
,
"test"
},
initialCommand
:
[]
string
{},
initialCommand
:
[]
string
{},
expected
:
"mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *
\\
nIdentityFile ~/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > ~/.ssh/config && mpirun --oversubscribe -n 2 -H ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-ldr-0.${GROVE_HEADLESS_SERVICE},${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-wkr-0.${GROVE_HEADLESS_SERVICE} --mca pml ob1 --mca plm_rsh_args
\"
-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa
\"
-x CUDA_VISIBLE_DEVICES -x CUSTOM_VAR -x HF_DATASETS_CACHE -x HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch serve --model test'"
,
expected
:
"mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *
\\
nIdentityFile ~/.ssh/id_rsa
\\
nStrictHostKeyChecking no
\\
nPort 2222
\\
n' > ~/.ssh/config && mpirun --oversubscribe -n 2 -H ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-ldr-0.${GROVE_HEADLESS_SERVICE},${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-wkr-0.${GROVE_HEADLESS_SERVICE} --mca pml ob1 --mca plm_rsh_args
\"
-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa
\"
-x CUDA_VISIBLE_DEVICES -x CUSTOM_VAR -x HF_DATASETS_CACHE -x
HF_ENDPOINT -x
HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch serve --model test'"
,
},
},
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment