Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
f8208f8d
Unverified
Commit
f8208f8d
authored
Apr 17, 2026
by
Graham King
Committed by
GitHub
Apr 17, 2026
Browse files
chore: Pin YAML pip installs to reduce supply chain risk (#8306)
Signed-off-by:
Graham King
<
grahamk@nvidia.com
>
parent
876aeb35
Changes
21
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
42 additions
and
43 deletions
+42
-43
examples/backends/vllm/deploy/lora/multimodal/sync-lora-job.yaml
...s/backends/vllm/deploy/lora/multimodal/sync-lora-job.yaml
+2
-2
examples/backends/vllm/deploy/lora/sync-lora-job.yaml
examples/backends/vllm/deploy/lora/sync-lora-job.yaml
+2
-2
recipes/deepseek-r1/model-cache/model-download-sglang.yaml
recipes/deepseek-r1/model-cache/model-download-sglang.yaml
+3
-3
recipes/deepseek-r1/model-cache/model-download.yaml
recipes/deepseek-r1/model-cache/model-download.yaml
+3
-3
recipes/deepseek-r1/trtllm/disagg/wide_ep/gb200/perf.yaml
recipes/deepseek-r1/trtllm/disagg/wide_ep/gb200/perf.yaml
+1
-2
recipes/deepseek-v32-fp4/model-cache/model-download.yaml
recipes/deepseek-v32-fp4/model-cache/model-download.yaml
+2
-2
recipes/glm-5-nvfp4/model-cache/model-download.yaml
recipes/glm-5-nvfp4/model-cache/model-download.yaml
+2
-2
recipes/glm-5-nvfp4/sglang/disagg/perf.yaml
recipes/glm-5-nvfp4/sglang/disagg/perf.yaml
+1
-1
recipes/gpt-oss-120b/model-cache/model-download.yaml
recipes/gpt-oss-120b/model-cache/model-download.yaml
+3
-3
recipes/kimi-k2.5/model-cache/baseten/model-download.yaml
recipes/kimi-k2.5/model-cache/baseten/model-download.yaml
+3
-3
recipes/kimi-k2.5/model-cache/nvidia/eagle-download.yaml
recipes/kimi-k2.5/model-cache/nvidia/eagle-download.yaml
+2
-2
recipes/kimi-k2.5/model-cache/nvidia/model-download.yaml
recipes/kimi-k2.5/model-cache/nvidia/model-download.yaml
+3
-3
recipes/llama-3-70b/model-cache/model-download.yaml
recipes/llama-3-70b/model-cache/model-download.yaml
+3
-3
recipes/nemotron-3-super-fp8/model-cache/model-download.yaml
recipes/nemotron-3-super-fp8/model-cache/model-download.yaml
+2
-2
recipes/qwen3-235b-a22b-fp8/model-cache/model-download.yaml
recipes/qwen3-235b-a22b-fp8/model-cache/model-download.yaml
+2
-2
recipes/qwen3-32b-fp8/model-cache/model-download.yaml
recipes/qwen3-32b-fp8/model-cache/model-download.yaml
+2
-2
recipes/qwen3-32b/model-cache/model-download.yaml
recipes/qwen3-32b/model-cache/model-download.yaml
+2
-2
recipes/qwen3-32b/vllm/agg-round-robin/perf.yaml
recipes/qwen3-32b/vllm/agg-round-robin/perf.yaml
+1
-1
recipes/qwen3-32b/vllm/disagg-kv-router/perf.yaml
recipes/qwen3-32b/vllm/disagg-kv-router/perf.yaml
+1
-1
recipes/qwen3-vl-30b/model-cache/model-download.yaml
recipes/qwen3-vl-30b/model-cache/model-download.yaml
+2
-2
No files found.
examples/backends/vllm/deploy/lora/multimodal/sync-lora-job.yaml
View file @
f8208f8d
...
@@ -15,7 +15,7 @@ spec:
...
@@ -15,7 +15,7 @@ spec:
-
-c
-
-c
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface-hub
awscli
pip install --no-cache-dir huggingface-hub
==1.11.0 awscli==1.44.80
hf download $MODEL_NAME --local-dir /tmp/lora
hf download $MODEL_NAME --local-dir /tmp/lora
rm -rf /tmp/lora/.cache
rm -rf /tmp/lora/.cache
aws --endpoint-url=http://minio:9000 s3 mb s3://$LORA_ROOT_PATH || true
aws --endpoint-url=http://minio:9000 s3 mb s3://$LORA_ROOT_PATH || true
...
@@ -35,4 +35,4 @@ spec:
...
@@ -35,4 +35,4 @@ spec:
-
name
:
MODEL_NAME
-
name
:
MODEL_NAME
value
:
Chhagan005/Chhagan-DocVL-Qwen3
value
:
Chhagan005/Chhagan-DocVL-Qwen3
restartPolicy
:
Never
restartPolicy
:
Never
backoffLimit
:
3
backoffLimit
:
3
\ No newline at end of file
examples/backends/vllm/deploy/lora/sync-lora-job.yaml
View file @
f8208f8d
...
@@ -15,7 +15,7 @@ spec:
...
@@ -15,7 +15,7 @@ spec:
-
-c
-
-c
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface-hub
awscli
pip install --no-cache-dir huggingface-hub
==1.11.0 awscli==1.44.80
hf download $MODEL_NAME --local-dir /tmp/lora
hf download $MODEL_NAME --local-dir /tmp/lora
rm -rf /tmp/lora/.cache
rm -rf /tmp/lora/.cache
aws --endpoint-url=http://minio:9000 s3 mb s3://$LORA_ROOT_PATH || true
aws --endpoint-url=http://minio:9000 s3 mb s3://$LORA_ROOT_PATH || true
...
@@ -35,4 +35,4 @@ spec:
...
@@ -35,4 +35,4 @@ spec:
-
name
:
MODEL_NAME
-
name
:
MODEL_NAME
value
:
codelion/Qwen3-0.6B-accuracy-recovery-lora
value
:
codelion/Qwen3-0.6B-accuracy-recovery-lora
restartPolicy
:
Never
restartPolicy
:
Never
backoffLimit
:
3
backoffLimit
:
3
\ No newline at end of file
recipes/deepseek-r1/model-cache/model-download-sglang.yaml
View file @
f8208f8d
...
@@ -20,14 +20,14 @@ spec:
...
@@ -20,14 +20,14 @@ spec:
image
:
python:3.10-slim
image
:
python:3.10-slim
command
:
[
"
sh"
,
"
-c"
]
command
:
[
"
sh"
,
"
-c"
]
env
:
env
:
-
name
:
HF_
HUB_ENABLE_HF_TRANSFER
-
name
:
HF_
XET_HIGH_PERFORMANCE
value
:
"
1"
value
:
"
1"
-
name
:
HF_HOME
-
name
:
HF_HOME
value
:
/opt/model-cache
value
:
/opt/model-cache
args
:
args
:
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface_hub
hf_transfer
pip install --no-cache-dir huggingface_hub
==1.11.0
hf download deepseek-ai/DeepSeek-R1
hf download deepseek-ai/DeepSeek-R1
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
...
@@ -35,4 +35,4 @@ spec:
...
@@ -35,4 +35,4 @@ spec:
volumes
:
volumes
:
-
name
:
model-cache
-
name
:
model-cache
persistentVolumeClaim
:
persistentVolumeClaim
:
claimName
:
model-cache
claimName
:
model-cache
\ No newline at end of file
recipes/deepseek-r1/model-cache/model-download.yaml
View file @
f8208f8d
...
@@ -20,7 +20,7 @@ spec:
...
@@ -20,7 +20,7 @@ spec:
image
:
python:3.10-slim
image
:
python:3.10-slim
command
:
[
"
sh"
,
"
-c"
]
command
:
[
"
sh"
,
"
-c"
]
env
:
env
:
-
name
:
HF_
HUB_ENABLE_HF_TRANSFER
-
name
:
HF_
XET_HIGH_PERFORMANCE
value
:
"
1"
value
:
"
1"
# Optional: create with: kubectl create secret generic hf-token-secret --from-literal=HF_TOKEN=<token> -n <namespace>
# Optional: create with: kubectl create secret generic hf-token-secret --from-literal=HF_TOKEN=<token> -n <namespace>
-
name
:
HF_TOKEN
-
name
:
HF_TOKEN
...
@@ -32,7 +32,7 @@ spec:
...
@@ -32,7 +32,7 @@ spec:
args
:
args
:
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface_hub
hf_transfer
pip install --no-cache-dir huggingface_hub
==1.11.0
hf download nvidia/DeepSeek-R1-FP4 --local-dir /model-cache/deepseek-r1-fp4
hf download nvidia/DeepSeek-R1-FP4 --local-dir /model-cache/deepseek-r1-fp4
hf download deepseek-ai/DeepSeek-R1 --local-dir /model-cache/deepseek-r1
hf download deepseek-ai/DeepSeek-R1 --local-dir /model-cache/deepseek-r1
volumeMounts
:
volumeMounts
:
...
@@ -41,4 +41,4 @@ spec:
...
@@ -41,4 +41,4 @@ spec:
volumes
:
volumes
:
-
name
:
model-cache
-
name
:
model-cache
persistentVolumeClaim
:
persistentVolumeClaim
:
claimName
:
model-cache
claimName
:
model-cache
\ No newline at end of file
recipes/deepseek-r1/trtllm/disagg/wide_ep/gb200/perf.yaml
View file @
f8208f8d
...
@@ -30,7 +30,7 @@ spec:
...
@@ -30,7 +30,7 @@ spec:
-
-c
-
-c
-
|
-
|
apt-get update && apt-get install -y curl jq procps git && apt-get clean
apt-get update && apt-get install -y curl jq procps git && apt-get clean
pip install aiperf;
pip install aiperf
==0.6.0
;
echo "aiperf installation completed";
echo "aiperf installation completed";
sysctl -w net.ipv4.ip_local_port_range="1024 65000"
sysctl -w net.ipv4.ip_local_port_range="1024 65000"
cat /proc/sys/net/ipv4/ip_local_port_range
cat /proc/sys/net/ipv4/ip_local_port_range
...
@@ -152,4 +152,3 @@ spec:
...
@@ -152,4 +152,3 @@ spec:
-
name
:
model-cache
-
name
:
model-cache
persistentVolumeClaim
:
persistentVolumeClaim
:
claimName
:
model-cache
claimName
:
model-cache
recipes/deepseek-v32-fp4/model-cache/model-download.yaml
View file @
f8208f8d
...
@@ -32,12 +32,12 @@ spec:
...
@@ -32,12 +32,12 @@ spec:
value
:
nvidia/DeepSeek-V3.2-NVFP4
value
:
nvidia/DeepSeek-V3.2-NVFP4
-
name
:
HF_HOME
-
name
:
HF_HOME
value
:
/model-store
value
:
/model-store
-
name
:
HF_
HUB_ENABLE_HF_TRANSFER
-
name
:
HF_
XET_HIGH_PERFORMANCE
value
:
"
1"
value
:
"
1"
args
:
args
:
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface_hub
hf_transfer
pip install --no-cache-dir huggingface_hub
==1.11.0
hf download $MODEL_NAME
hf download $MODEL_NAME
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
...
...
recipes/glm-5-nvfp4/model-cache/model-download.yaml
View file @
f8208f8d
...
@@ -31,12 +31,12 @@ spec:
...
@@ -31,12 +31,12 @@ spec:
value
:
nvidia/GLM-5-NVFP4
value
:
nvidia/GLM-5-NVFP4
-
name
:
HF_HOME
-
name
:
HF_HOME
value
:
/model-store
value
:
/model-store
-
name
:
HF_
HUB_ENABLE_HF_TRANSFER
-
name
:
HF_
XET_HIGH_PERFORMANCE
value
:
"
1"
value
:
"
1"
args
:
args
:
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface_hub
hf_transfer
pip install --no-cache-dir huggingface_hub
==1.11.0
hf download $MODEL_NAME
hf download $MODEL_NAME
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
...
...
recipes/glm-5-nvfp4/sglang/disagg/perf.yaml
View file @
f8208f8d
...
@@ -53,7 +53,7 @@ spec:
...
@@ -53,7 +53,7 @@ spec:
-
|
-
|
set -eu
set -eu
apt-get update -qq && apt-get install -y -qq curl jq && apt-get clean
apt-get update -qq && apt-get install -y -qq curl jq && apt-get clean
pip install -q aiperf transformers tokenizers
pip install -q aiperf
==0.6.0
transformers
==4.57.3
tokenizers
==0.22.2
echo "Waiting for model at http://$ENDPOINT/v1/models..."
echo "Waiting for model at http://$ENDPOINT/v1/models..."
while ! curl -sf "http://$ENDPOINT/v1/models" | jq -e --arg m "$TARGET_MODEL" '.data[]? | select(.id == $m)' >/dev/null 2>&1; do
while ! curl -sf "http://$ENDPOINT/v1/models" | jq -e --arg m "$TARGET_MODEL" '.data[]? | select(.id == $m)' >/dev/null 2>&1; do
...
...
recipes/gpt-oss-120b/model-cache/model-download.yaml
View file @
f8208f8d
...
@@ -26,14 +26,14 @@ spec:
...
@@ -26,14 +26,14 @@ spec:
value
:
openai/gpt-oss-120b
value
:
openai/gpt-oss-120b
-
name
:
HF_HOME
-
name
:
HF_HOME
value
:
/model-store
value
:
/model-store
-
name
:
HF_
HUB_ENABLE_HF_TRANSFER
-
name
:
HF_
XET_HIGH_PERFORMANCE
value
:
"
1"
value
:
"
1"
-
name
:
MODEL_REVISION
-
name
:
MODEL_REVISION
value
:
b5c939de8f754692c1647ca79fbf85e8c1e70f8a
value
:
b5c939de8f754692c1647ca79fbf85e8c1e70f8a
args
:
args
:
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface_hub
hf_transfer
pip install --no-cache-dir huggingface_hub
==1.11.0
hf download $MODEL_NAME --revision $MODEL_REVISION --exclude "original/*" --exclude "metal/*"
hf download $MODEL_NAME --revision $MODEL_REVISION --exclude "original/*" --exclude "metal/*"
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
...
@@ -41,4 +41,4 @@ spec:
...
@@ -41,4 +41,4 @@ spec:
volumes
:
volumes
:
-
name
:
model-cache
-
name
:
model-cache
persistentVolumeClaim
:
persistentVolumeClaim
:
claimName
:
model-cache
claimName
:
model-cache
\ No newline at end of file
recipes/kimi-k2.5/model-cache/baseten/model-download.yaml
View file @
f8208f8d
...
@@ -26,12 +26,12 @@ spec:
...
@@ -26,12 +26,12 @@ spec:
value
:
baseten-admin/Kimi-2.5-text-nvfp4-v3
value
:
baseten-admin/Kimi-2.5-text-nvfp4-v3
-
name
:
HF_HOME
-
name
:
HF_HOME
value
:
/model-store
value
:
/model-store
-
name
:
HF_
HUB_ENABLE_HF_TRANSFER
-
name
:
HF_
XET_HIGH_PERFORMANCE
value
:
"
1"
value
:
"
1"
args
:
args
:
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface_hub
hf_transfer
pip install --no-cache-dir huggingface_hub
==1.11.0
hf download $MODEL_NAME
hf download $MODEL_NAME
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
...
@@ -39,4 +39,4 @@ spec:
...
@@ -39,4 +39,4 @@ spec:
volumes
:
volumes
:
-
name
:
model-cache
-
name
:
model-cache
persistentVolumeClaim
:
persistentVolumeClaim
:
claimName
:
model-cache
claimName
:
model-cache
\ No newline at end of file
recipes/kimi-k2.5/model-cache/nvidia/eagle-download.yaml
View file @
f8208f8d
...
@@ -28,12 +28,12 @@ spec:
...
@@ -28,12 +28,12 @@ spec:
value
:
0b0c6ac039089ad2c2418c91c039553381a302d9
value
:
0b0c6ac039089ad2c2418c91c039553381a302d9
-
name
:
HF_HOME
-
name
:
HF_HOME
value
:
/model-store
value
:
/model-store
-
name
:
HF_
HUB_ENABLE_HF_TRANSFER
-
name
:
HF_
XET_HIGH_PERFORMANCE
value
:
"
1"
value
:
"
1"
args
:
args
:
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface_hub
hf_transfer
pip install --no-cache-dir huggingface_hub
==1.11.0
hf download "$MODEL_NAME" --revision "$MODEL_REVISION"
hf download "$MODEL_NAME" --revision "$MODEL_REVISION"
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
...
...
recipes/kimi-k2.5/model-cache/nvidia/model-download.yaml
View file @
f8208f8d
...
@@ -26,12 +26,12 @@ spec:
...
@@ -26,12 +26,12 @@ spec:
value
:
nvidia/Kimi-K2.5-NVFP4
value
:
nvidia/Kimi-K2.5-NVFP4
-
name
:
HF_HOME
-
name
:
HF_HOME
value
:
/model-store
value
:
/model-store
-
name
:
HF_
HUB_ENABLE_HF_TRANSFER
-
name
:
HF_
XET_HIGH_PERFORMANCE
value
:
"
1"
value
:
"
1"
args
:
args
:
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface_hub
hf_transfer
pip install --no-cache-dir huggingface_hub
==1.11.0
hf download $MODEL_NAME
hf download $MODEL_NAME
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
...
@@ -39,4 +39,4 @@ spec:
...
@@ -39,4 +39,4 @@ spec:
volumes
:
volumes
:
-
name
:
model-cache
-
name
:
model-cache
persistentVolumeClaim
:
persistentVolumeClaim
:
claimName
:
model-cache
claimName
:
model-cache
\ No newline at end of file
recipes/llama-3-70b/model-cache/model-download.yaml
View file @
f8208f8d
...
@@ -26,14 +26,14 @@ spec:
...
@@ -26,14 +26,14 @@ spec:
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
-
name
:
HF_HOME
-
name
:
HF_HOME
value
:
/model-store
value
:
/model-store
-
name
:
HF_
HUB_ENABLE_HF_TRANSFER
-
name
:
HF_
XET_HIGH_PERFORMANCE
value
:
"
1"
value
:
"
1"
-
name
:
MODEL_REVISION
-
name
:
MODEL_REVISION
value
:
ddb4128556dfcff99e0c41aee159ea6c3e655dcd
value
:
ddb4128556dfcff99e0c41aee159ea6c3e655dcd
args
:
args
:
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface_hub
hf_transfer
pip install --no-cache-dir huggingface_hub
==1.11.0
hf download $MODEL_NAME --revision $MODEL_REVISION
hf download $MODEL_NAME --revision $MODEL_REVISION
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
...
@@ -41,4 +41,4 @@ spec:
...
@@ -41,4 +41,4 @@ spec:
volumes
:
volumes
:
-
name
:
model-cache
-
name
:
model-cache
persistentVolumeClaim
:
persistentVolumeClaim
:
claimName
:
model-cache
claimName
:
model-cache
\ No newline at end of file
recipes/nemotron-3-super-fp8/model-cache/model-download.yaml
View file @
f8208f8d
...
@@ -26,12 +26,12 @@ spec:
...
@@ -26,12 +26,12 @@ spec:
value
:
nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8
value
:
nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8
-
name
:
HF_HOME
-
name
:
HF_HOME
value
:
/model-store
value
:
/model-store
-
name
:
HF_
HUB_ENABLE_HF_TRANSFER
-
name
:
HF_
XET_HIGH_PERFORMANCE
value
:
"
1"
value
:
"
1"
args
:
args
:
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface_hub
hf_transfer
pip install --no-cache-dir huggingface_hub
==1.11.0
hf download $MODEL_NAME
hf download $MODEL_NAME
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
...
...
recipes/qwen3-235b-a22b-fp8/model-cache/model-download.yaml
View file @
f8208f8d
...
@@ -26,14 +26,14 @@ spec:
...
@@ -26,14 +26,14 @@ spec:
value
:
Qwen/Qwen3-235B-A22B-FP8
value
:
Qwen/Qwen3-235B-A22B-FP8
-
name
:
HF_HOME
-
name
:
HF_HOME
value
:
/model-store
value
:
/model-store
-
name
:
HF_
HUB_ENABLE_HF_TRANSFER
-
name
:
HF_
XET_HIGH_PERFORMANCE
value
:
"
1"
value
:
"
1"
-
name
:
MODEL_REVISION
-
name
:
MODEL_REVISION
value
:
39eb2b067ea6b8e3e1dd97d3cd0c7ffeaf3e1a35
value
:
39eb2b067ea6b8e3e1dd97d3cd0c7ffeaf3e1a35
args
:
args
:
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface_hub
hf_transfer
pip install --no-cache-dir huggingface_hub
==1.11.0
hf download $MODEL_NAME --revision $MODEL_REVISION
hf download $MODEL_NAME --revision $MODEL_REVISION
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
...
...
recipes/qwen3-32b-fp8/model-cache/model-download.yaml
View file @
f8208f8d
...
@@ -26,14 +26,14 @@ spec:
...
@@ -26,14 +26,14 @@ spec:
value
:
Qwen/Qwen3-32B-FP8
value
:
Qwen/Qwen3-32B-FP8
-
name
:
HF_HOME
-
name
:
HF_HOME
value
:
/model-store
value
:
/model-store
-
name
:
HF_
HUB_ENABLE_HF_TRANSFER
-
name
:
HF_
XET_HIGH_PERFORMANCE
value
:
"
1"
value
:
"
1"
-
name
:
MODEL_REVISION
-
name
:
MODEL_REVISION
value
:
aa55da1ecc13d006e8b8e4f54579b1ea8c3db2df
value
:
aa55da1ecc13d006e8b8e4f54579b1ea8c3db2df
args
:
args
:
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface_hub
hf_transfer
pip install --no-cache-dir huggingface_hub
==1.11.0
hf download $MODEL_NAME --revision $MODEL_REVISION
hf download $MODEL_NAME --revision $MODEL_REVISION
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
...
...
recipes/qwen3-32b/model-cache/model-download.yaml
View file @
f8208f8d
...
@@ -26,14 +26,14 @@ spec:
...
@@ -26,14 +26,14 @@ spec:
value
:
"
Qwen/Qwen3-32B"
value
:
"
Qwen/Qwen3-32B"
-
name
:
HF_HOME
-
name
:
HF_HOME
value
:
/home/dynamo/.cache/huggingface
value
:
/home/dynamo/.cache/huggingface
-
name
:
HF_
HUB_ENABLE_HF_TRANSFER
-
name
:
HF_
XET_HIGH_PERFORMANCE
value
:
"
1"
value
:
"
1"
-
name
:
MODEL_REVISION
-
name
:
MODEL_REVISION
value
:
9216db5781bf21249d130ec9da846c4624c16137
value
:
9216db5781bf21249d130ec9da846c4624c16137
args
:
args
:
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface_hub
hf_transfer
pip install --no-cache-dir huggingface_hub
==1.11.0
hf download $MODEL_NAME --revision $MODEL_REVISION
hf download $MODEL_NAME --revision $MODEL_REVISION
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
...
...
recipes/qwen3-32b/vllm/agg-round-robin/perf.yaml
View file @
f8208f8d
...
@@ -20,7 +20,7 @@ spec:
...
@@ -20,7 +20,7 @@ spec:
apt update && apt install tmux wget curl jq -y
apt update && apt install tmux wget curl jq -y
# Install benchmarking tool
# Install benchmarking tool
pip install aiperf
pip install aiperf
==0.6.0
# Wait for model to be ready
# Wait for model to be ready
echo "Waiting for model '${MODEL_NAME}' at http://${FRONTEND}:8000/v1/models..."
echo "Waiting for model '${MODEL_NAME}' at http://${FRONTEND}:8000/v1/models..."
...
...
recipes/qwen3-32b/vllm/disagg-kv-router/perf.yaml
View file @
f8208f8d
...
@@ -20,7 +20,7 @@ spec:
...
@@ -20,7 +20,7 @@ spec:
apt update && apt install tmux wget curl jq -y
apt update && apt install tmux wget curl jq -y
# Install benchmarking tool
# Install benchmarking tool
pip install aiperf
pip install aiperf
==0.6.0
# Wait for model to be ready
# Wait for model to be ready
echo "Waiting for model '${MODEL_NAME}' at http://${FRONTEND}:8000/v1/models..."
echo "Waiting for model '${MODEL_NAME}' at http://${FRONTEND}:8000/v1/models..."
...
...
recipes/qwen3-vl-30b/model-cache/model-download.yaml
View file @
f8208f8d
...
@@ -26,14 +26,14 @@ spec:
...
@@ -26,14 +26,14 @@ spec:
value
:
"
Qwen/Qwen3-VL-30B-A3B-Instruct-FP8"
# Remove FP8 for BF16 variant
value
:
"
Qwen/Qwen3-VL-30B-A3B-Instruct-FP8"
# Remove FP8 for BF16 variant
-
name
:
HF_HOME
-
name
:
HF_HOME
value
:
/home/dynamo/.cache/huggingface
value
:
/home/dynamo/.cache/huggingface
-
name
:
HF_
HUB_ENABLE_HF_TRANSFER
-
name
:
HF_
XET_HIGH_PERFORMANCE
value
:
"
1"
value
:
"
1"
-
name
:
MODEL_REVISION
-
name
:
MODEL_REVISION
value
:
"
main"
value
:
"
main"
args
:
args
:
-
|
-
|
set -eux
set -eux
pip install --no-cache-dir huggingface_hub
hf_transfer
pip install --no-cache-dir huggingface_hub
==1.11.0
hf download "$MODEL_NAME" --revision "$MODEL_REVISION"
hf download "$MODEL_NAME" --revision "$MODEL_REVISION"
volumeMounts
:
volumeMounts
:
-
name
:
model-cache
-
name
:
model-cache
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment