Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
bf21cc03
Unverified
Commit
bf21cc03
authored
Nov 13, 2025
by
Thomas Montfort
Committed by
GitHub
Nov 13, 2025
Browse files
fix: llama3-70-b-agg recipe model download failure (#4290)
parent
f817c595
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
45 additions
and
18 deletions
+45
-18
recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
+4
-2
recipes/llama-3-70b/vllm/agg/deploy.yaml
recipes/llama-3-70b/vllm/agg/deploy.yaml
+8
-3
recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
+12
-5
recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
+12
-5
recipes/qwen3-32b-fp8/trtllm/agg/deploy.yaml
recipes/qwen3-32b-fp8/trtllm/agg/deploy.yaml
+3
-1
recipes/qwen3-32b-fp8/trtllm/disagg/deploy.yaml
recipes/qwen3-32b-fp8/trtllm/disagg/deploy.yaml
+6
-2
No files found.
recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
View file @
bf21cc03
...
@@ -54,7 +54,7 @@ spec:
...
@@ -54,7 +54,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -92,7 +92,9 @@ spec:
...
@@ -92,7 +92,9 @@ spec:
-
name
:
ENGINE_ARGS
-
name
:
ENGINE_ARGS
value
:
"
/opt/dynamo/configs/config.yaml"
value
:
"
/opt/dynamo/configs/config.yaml"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
/root/.cache/huggingface/hub/models--openai--gpt-oss-120b/snapshots/b5c939de8f754692c1647ca79fbf85e8c1e70f8a"
value
:
"
/opt/models/hub/models--openai--gpt-oss-120b/snapshots/b5c939de8f754692c1647ca79fbf85e8c1e70f8a"
-
name
:
HF_HOME
value
:
/opt/models
volumeMounts
:
volumeMounts
:
-
mountPath
:
/opt/dynamo/configs
-
mountPath
:
/opt/dynamo/configs
name
:
llm-config
name
:
llm-config
...
...
recipes/llama-3-70b/vllm/agg/deploy.yaml
View file @
bf21cc03
...
@@ -15,11 +15,14 @@ spec:
...
@@ -15,11 +15,14 @@ spec:
dynamoNamespace
:
llama3-70b-agg
dynamoNamespace
:
llama3-70b-agg
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
extraPodSpec
:
extraPodSpec
:
mainContainer
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir
:
/workspace/examples/backends/vllm
workingDir
:
/workspace/examples/backends/vllm
envs
:
-
name
:
HF_HOME
value
:
/opt/models
replicas
:
1
replicas
:
1
VllmPrefillWorker
:
VllmPrefillWorker
:
componentType
:
worker
componentType
:
worker
...
@@ -27,7 +30,7 @@ spec:
...
@@ -27,7 +30,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
20Gi
size
:
20Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -36,7 +39,9 @@ spec:
...
@@ -36,7 +39,9 @@ spec:
-
name
:
SERVED_MODEL_NAME
-
name
:
SERVED_MODEL_NAME
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
/root/.cache/huggingface/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
value
:
"
/opt/models/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
-
name
:
HF_HOME
value
:
/opt/models
args
:
args
:
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
4
--data-parallel-size
1
--disable-log-requests
--gpu-memory-utilization
0.90
--no-enable-prefix-caching
--block-size
128"
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
4
--data-parallel-size
1
--disable-log-requests
--gpu-memory-utilization
0.90
--no-enable-prefix-caching
--block-size
128"
command
:
command
:
...
...
recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
View file @
bf21cc03
...
@@ -15,11 +15,14 @@ spec:
...
@@ -15,11 +15,14 @@ spec:
dynamoNamespace
:
llama3-70b-disagg-mn
dynamoNamespace
:
llama3-70b-disagg-mn
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
extraPodSpec
:
extraPodSpec
:
mainContainer
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir
:
/workspace/examples/backends/vllm
workingDir
:
/workspace/examples/backends/vllm
envs
:
-
name
:
HF_HOME
value
:
/opt/models
replicas
:
1
replicas
:
1
VllmPrefillWorker
:
VllmPrefillWorker
:
componentType
:
worker
componentType
:
worker
...
@@ -27,7 +30,7 @@ spec:
...
@@ -27,7 +30,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -36,7 +39,9 @@ spec:
...
@@ -36,7 +39,9 @@ spec:
-
name
:
SERVED_MODEL_NAME
-
name
:
SERVED_MODEL_NAME
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
/root/.cache/huggingface/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
value
:
"
/opt/models/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
-
name
:
HF_HOME
value
:
/opt/models
args
:
args
:
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
8
--data-parallel-size
1
--disable-log-requests
--is-prefill-worker
--gpu-memory-utilization
0.95
--no-enable-prefix-caching
--block-size
128"
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
8
--data-parallel-size
1
--disable-log-requests
--is-prefill-worker
--gpu-memory-utilization
0.95
--no-enable-prefix-caching
--block-size
128"
command
:
command
:
...
@@ -56,7 +61,7 @@ spec:
...
@@ -56,7 +61,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -65,7 +70,9 @@ spec:
...
@@ -65,7 +70,9 @@ spec:
-
name
:
SERVED_MODEL_NAME
-
name
:
SERVED_MODEL_NAME
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
/root/.cache/huggingface/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
value
:
"
/opt/models/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
-
name
:
HF_HOME
value
:
/opt/models
args
:
args
:
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
8
--data-parallel-size
1
--disable-log-requests
--gpu-memory-utilization
0.90
--no-enable-prefix-caching
--block-size
128"
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
8
--data-parallel-size
1
--disable-log-requests
--gpu-memory-utilization
0.90
--no-enable-prefix-caching
--block-size
128"
command
:
command
:
...
...
recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
View file @
bf21cc03
...
@@ -15,11 +15,14 @@ spec:
...
@@ -15,11 +15,14 @@ spec:
dynamoNamespace
:
llama3-70b-disagg-sn
dynamoNamespace
:
llama3-70b-disagg-sn
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
extraPodSpec
:
extraPodSpec
:
mainContainer
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir
:
/workspace/examples/backends/vllm
workingDir
:
/workspace/examples/backends/vllm
envs
:
-
name
:
HF_HOME
value
:
/opt/models
replicas
:
1
replicas
:
1
VllmPrefillWorker
:
VllmPrefillWorker
:
componentType
:
worker
componentType
:
worker
...
@@ -27,7 +30,7 @@ spec:
...
@@ -27,7 +30,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -48,7 +51,9 @@ spec:
...
@@ -48,7 +51,9 @@ spec:
-
name
:
SERVED_MODEL_NAME
-
name
:
SERVED_MODEL_NAME
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
/root/.cache/huggingface/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
value
:
"
/opt/models/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
-
name
:
HF_HOME
value
:
/opt/models
args
:
args
:
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
2
--data-parallel-size
1
--disable-log-requests
--is-prefill-worker
--gpu-memory-utilization
0.95
--no-enable-prefix-caching
--block-size
128"
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
2
--data-parallel-size
1
--disable-log-requests
--is-prefill-worker
--gpu-memory-utilization
0.95
--no-enable-prefix-caching
--block-size
128"
command
:
command
:
...
@@ -68,7 +73,7 @@ spec:
...
@@ -68,7 +73,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -89,7 +94,9 @@ spec:
...
@@ -89,7 +94,9 @@ spec:
-
name
:
SERVED_MODEL_NAME
-
name
:
SERVED_MODEL_NAME
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
/root/.cache/huggingface/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
value
:
"
/opt/models/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
-
name
:
HF_HOME
value
:
/opt/models
args
:
args
:
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
4
--data-parallel-size
1
--disable-log-requests
--gpu-memory-utilization
0.90
--no-enable-prefix-caching
--block-size
128"
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
4
--data-parallel-size
1
--disable-log-requests
--gpu-memory-utilization
0.90
--no-enable-prefix-caching
--block-size
128"
command
:
command
:
...
...
recipes/qwen3-32b-fp8/trtllm/agg/deploy.yaml
View file @
bf21cc03
...
@@ -70,7 +70,7 @@ spec:
...
@@ -70,7 +70,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -106,6 +106,8 @@ spec:
...
@@ -106,6 +106,8 @@ spec:
value
:
"
/opt/dynamo/configs/config.yaml"
value
:
"
/opt/dynamo/configs/config.yaml"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
Qwen/Qwen3-32B-FP8"
value
:
"
Qwen/Qwen3-32B-FP8"
-
name
:
HF_HOME
value
:
"
/opt/models"
volumeMounts
:
volumeMounts
:
-
mountPath
:
/opt/dynamo/configs
-
mountPath
:
/opt/dynamo/configs
name
:
llm-config
name
:
llm-config
...
...
recipes/qwen3-32b-fp8/trtllm/disagg/deploy.yaml
View file @
bf21cc03
...
@@ -228,7 +228,7 @@ spec:
...
@@ -228,7 +228,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -265,6 +265,8 @@ spec:
...
@@ -265,6 +265,8 @@ spec:
value
:
"
/opt/dynamo/configs/config-prefill.yaml"
value
:
"
/opt/dynamo/configs/config-prefill.yaml"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
Qwen/Qwen3-32B-FP8"
value
:
"
Qwen/Qwen3-32B-FP8"
-
name
:
HF_HOME
value
:
"
/opt/models"
volumeMounts
:
volumeMounts
:
-
mountPath
:
/opt/dynamo/configs
-
mountPath
:
/opt/dynamo/configs
name
:
llm-config-prefill
name
:
llm-config-prefill
...
@@ -287,7 +289,7 @@ spec:
...
@@ -287,7 +289,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -324,6 +326,8 @@ spec:
...
@@ -324,6 +326,8 @@ spec:
value
:
"
/opt/dynamo/configs/config-decode.yaml"
value
:
"
/opt/dynamo/configs/config-decode.yaml"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
Qwen/Qwen3-32B-FP8"
value
:
"
Qwen/Qwen3-32B-FP8"
-
name
:
HF_HOME
value
:
"
/opt/models"
volumeMounts
:
volumeMounts
:
-
mountPath
:
/opt/dynamo/configs
-
mountPath
:
/opt/dynamo/configs
name
:
llm-config-decode
name
:
llm-config-decode
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment