Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
bf21cc03
"docs/vscode:/vscode.git/clone" did not exist on "fe718fd29545dfdaf971c73ffafe3ccb06a25899"
Unverified
Commit
bf21cc03
authored
Nov 13, 2025
by
Thomas Montfort
Committed by
GitHub
Nov 13, 2025
Browse files
fix: llama3-70-b-agg recipe model download failure (#4290)
parent
f817c595
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
45 additions
and
18 deletions
+45
-18
recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
+4
-2
recipes/llama-3-70b/vllm/agg/deploy.yaml
recipes/llama-3-70b/vllm/agg/deploy.yaml
+8
-3
recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
+12
-5
recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
+12
-5
recipes/qwen3-32b-fp8/trtllm/agg/deploy.yaml
recipes/qwen3-32b-fp8/trtllm/agg/deploy.yaml
+3
-1
recipes/qwen3-32b-fp8/trtllm/disagg/deploy.yaml
recipes/qwen3-32b-fp8/trtllm/disagg/deploy.yaml
+6
-2
No files found.
recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
View file @
bf21cc03
...
@@ -54,7 +54,7 @@ spec:
...
@@ -54,7 +54,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -92,7 +92,9 @@ spec:
...
@@ -92,7 +92,9 @@ spec:
-
name
:
ENGINE_ARGS
-
name
:
ENGINE_ARGS
value
:
"
/opt/dynamo/configs/config.yaml"
value
:
"
/opt/dynamo/configs/config.yaml"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
/root/.cache/huggingface/hub/models--openai--gpt-oss-120b/snapshots/b5c939de8f754692c1647ca79fbf85e8c1e70f8a"
value
:
"
/opt/models/hub/models--openai--gpt-oss-120b/snapshots/b5c939de8f754692c1647ca79fbf85e8c1e70f8a"
-
name
:
HF_HOME
value
:
/opt/models
volumeMounts
:
volumeMounts
:
-
mountPath
:
/opt/dynamo/configs
-
mountPath
:
/opt/dynamo/configs
name
:
llm-config
name
:
llm-config
...
...
recipes/llama-3-70b/vllm/agg/deploy.yaml
View file @
bf21cc03
...
@@ -15,11 +15,14 @@ spec:
...
@@ -15,11 +15,14 @@ spec:
dynamoNamespace
:
llama3-70b-agg
dynamoNamespace
:
llama3-70b-agg
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
extraPodSpec
:
extraPodSpec
:
mainContainer
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir
:
/workspace/examples/backends/vllm
workingDir
:
/workspace/examples/backends/vllm
envs
:
-
name
:
HF_HOME
value
:
/opt/models
replicas
:
1
replicas
:
1
VllmPrefillWorker
:
VllmPrefillWorker
:
componentType
:
worker
componentType
:
worker
...
@@ -27,7 +30,7 @@ spec:
...
@@ -27,7 +30,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
20Gi
size
:
20Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -36,7 +39,9 @@ spec:
...
@@ -36,7 +39,9 @@ spec:
-
name
:
SERVED_MODEL_NAME
-
name
:
SERVED_MODEL_NAME
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
/root/.cache/huggingface/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
value
:
"
/opt/models/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
-
name
:
HF_HOME
value
:
/opt/models
args
:
args
:
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
4
--data-parallel-size
1
--disable-log-requests
--gpu-memory-utilization
0.90
--no-enable-prefix-caching
--block-size
128"
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
4
--data-parallel-size
1
--disable-log-requests
--gpu-memory-utilization
0.90
--no-enable-prefix-caching
--block-size
128"
command
:
command
:
...
...
recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
View file @
bf21cc03
...
@@ -15,11 +15,14 @@ spec:
...
@@ -15,11 +15,14 @@ spec:
dynamoNamespace
:
llama3-70b-disagg-mn
dynamoNamespace
:
llama3-70b-disagg-mn
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
extraPodSpec
:
extraPodSpec
:
mainContainer
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir
:
/workspace/examples/backends/vllm
workingDir
:
/workspace/examples/backends/vllm
envs
:
-
name
:
HF_HOME
value
:
/opt/models
replicas
:
1
replicas
:
1
VllmPrefillWorker
:
VllmPrefillWorker
:
componentType
:
worker
componentType
:
worker
...
@@ -27,7 +30,7 @@ spec:
...
@@ -27,7 +30,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -36,7 +39,9 @@ spec:
...
@@ -36,7 +39,9 @@ spec:
-
name
:
SERVED_MODEL_NAME
-
name
:
SERVED_MODEL_NAME
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
/root/.cache/huggingface/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
value
:
"
/opt/models/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
-
name
:
HF_HOME
value
:
/opt/models
args
:
args
:
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
8
--data-parallel-size
1
--disable-log-requests
--is-prefill-worker
--gpu-memory-utilization
0.95
--no-enable-prefix-caching
--block-size
128"
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
8
--data-parallel-size
1
--disable-log-requests
--is-prefill-worker
--gpu-memory-utilization
0.95
--no-enable-prefix-caching
--block-size
128"
command
:
command
:
...
@@ -56,7 +61,7 @@ spec:
...
@@ -56,7 +61,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -65,7 +70,9 @@ spec:
...
@@ -65,7 +70,9 @@ spec:
-
name
:
SERVED_MODEL_NAME
-
name
:
SERVED_MODEL_NAME
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
/root/.cache/huggingface/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
value
:
"
/opt/models/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
-
name
:
HF_HOME
value
:
/opt/models
args
:
args
:
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
8
--data-parallel-size
1
--disable-log-requests
--gpu-memory-utilization
0.90
--no-enable-prefix-caching
--block-size
128"
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
8
--data-parallel-size
1
--disable-log-requests
--gpu-memory-utilization
0.90
--no-enable-prefix-caching
--block-size
128"
command
:
command
:
...
...
recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
View file @
bf21cc03
...
@@ -15,11 +15,14 @@ spec:
...
@@ -15,11 +15,14 @@ spec:
dynamoNamespace
:
llama3-70b-disagg-sn
dynamoNamespace
:
llama3-70b-disagg-sn
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
extraPodSpec
:
extraPodSpec
:
mainContainer
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir
:
/workspace/examples/backends/vllm
workingDir
:
/workspace/examples/backends/vllm
envs
:
-
name
:
HF_HOME
value
:
/opt/models
replicas
:
1
replicas
:
1
VllmPrefillWorker
:
VllmPrefillWorker
:
componentType
:
worker
componentType
:
worker
...
@@ -27,7 +30,7 @@ spec:
...
@@ -27,7 +30,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -48,7 +51,9 @@ spec:
...
@@ -48,7 +51,9 @@ spec:
-
name
:
SERVED_MODEL_NAME
-
name
:
SERVED_MODEL_NAME
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
/root/.cache/huggingface/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
value
:
"
/opt/models/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
-
name
:
HF_HOME
value
:
/opt/models
args
:
args
:
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
2
--data-parallel-size
1
--disable-log-requests
--is-prefill-worker
--gpu-memory-utilization
0.95
--no-enable-prefix-caching
--block-size
128"
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
2
--data-parallel-size
1
--disable-log-requests
--is-prefill-worker
--gpu-memory-utilization
0.95
--no-enable-prefix-caching
--block-size
128"
command
:
command
:
...
@@ -68,7 +73,7 @@ spec:
...
@@ -68,7 +73,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -89,7 +94,9 @@ spec:
...
@@ -89,7 +94,9 @@ spec:
-
name
:
SERVED_MODEL_NAME
-
name
:
SERVED_MODEL_NAME
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
/root/.cache/huggingface/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
value
:
"
/opt/models/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd"
-
name
:
HF_HOME
value
:
/opt/models
args
:
args
:
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
4
--data-parallel-size
1
--disable-log-requests
--gpu-memory-utilization
0.90
--no-enable-prefix-caching
--block-size
128"
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
4
--data-parallel-size
1
--disable-log-requests
--gpu-memory-utilization
0.90
--no-enable-prefix-caching
--block-size
128"
command
:
command
:
...
...
recipes/qwen3-32b-fp8/trtllm/agg/deploy.yaml
View file @
bf21cc03
...
@@ -70,7 +70,7 @@ spec:
...
@@ -70,7 +70,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -106,6 +106,8 @@ spec:
...
@@ -106,6 +106,8 @@ spec:
value
:
"
/opt/dynamo/configs/config.yaml"
value
:
"
/opt/dynamo/configs/config.yaml"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
Qwen/Qwen3-32B-FP8"
value
:
"
Qwen/Qwen3-32B-FP8"
-
name
:
HF_HOME
value
:
"
/opt/models"
volumeMounts
:
volumeMounts
:
-
mountPath
:
/opt/dynamo/configs
-
mountPath
:
/opt/dynamo/configs
name
:
llm-config
name
:
llm-config
...
...
recipes/qwen3-32b-fp8/trtllm/disagg/deploy.yaml
View file @
bf21cc03
...
@@ -228,7 +228,7 @@ spec:
...
@@ -228,7 +228,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -265,6 +265,8 @@ spec:
...
@@ -265,6 +265,8 @@ spec:
value
:
"
/opt/dynamo/configs/config-prefill.yaml"
value
:
"
/opt/dynamo/configs/config-prefill.yaml"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
Qwen/Qwen3-32B-FP8"
value
:
"
Qwen/Qwen3-32B-FP8"
-
name
:
HF_HOME
value
:
"
/opt/models"
volumeMounts
:
volumeMounts
:
-
mountPath
:
/opt/dynamo/configs
-
mountPath
:
/opt/dynamo/configs
name
:
llm-config-prefill
name
:
llm-config-prefill
...
@@ -287,7 +289,7 @@ spec:
...
@@ -287,7 +289,7 @@ spec:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
mountPoint
:
/
root/.cache/huggingface
mountPoint
:
/
opt/models
sharedMemory
:
sharedMemory
:
size
:
80Gi
size
:
80Gi
extraPodSpec
:
extraPodSpec
:
...
@@ -324,6 +326,8 @@ spec:
...
@@ -324,6 +326,8 @@ spec:
value
:
"
/opt/dynamo/configs/config-decode.yaml"
value
:
"
/opt/dynamo/configs/config-decode.yaml"
-
name
:
MODEL_PATH
-
name
:
MODEL_PATH
value
:
"
Qwen/Qwen3-32B-FP8"
value
:
"
Qwen/Qwen3-32B-FP8"
-
name
:
HF_HOME
value
:
"
/opt/models"
volumeMounts
:
volumeMounts
:
-
mountPath
:
/opt/dynamo/configs
-
mountPath
:
/opt/dynamo/configs
name
:
llm-config-decode
name
:
llm-config-decode
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment