Unverified Commit 876c9761 authored by Tushar Sharma's avatar Tushar Sharma Committed by GitHub
Browse files

chore: add ephemeral-storage requests to deploy profiles (#6723)


Co-authored-by: default avatarClaude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: default avatarDmitry Tokarev <dtokarev@nvidia.com>
parent 9c6f36e9
...@@ -297,7 +297,12 @@ jobs: ...@@ -297,7 +297,12 @@ jobs:
# ============================================================================ # ============================================================================
deploy-test-vllm: deploy-test-vllm:
# Run if core, vllm, or deploy is changed # Run if core, vllm, or deploy is changed
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' || needs.changed-files.outputs.deploy == 'true' # always() is needed because reusable workflows with skipped internal jobs
# (e.g. multi-gpu tests, arm64 copy) cause GitHub Actions to skip dependent
# jobs unless always() is present. !failure() ensures we still skip on real failures.
if: |
always() && !failure() &&
(needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' || needs.changed-files.outputs.deploy == 'true')
runs-on: prod-default-small-v2 runs-on: prod-default-small-v2
needs: [changed-files, deploy-operator, vllm-pipeline] needs: [changed-files, deploy-operator, vllm-pipeline]
timeout-minutes: 25 timeout-minutes: 25
...@@ -334,8 +339,10 @@ jobs: ...@@ -334,8 +339,10 @@ jobs:
deploy-test-sglang: deploy-test-sglang:
runs-on: prod-default-small-v2 runs-on: prod-default-small-v2
# Run if core, sglang, or deploy is changed # Run if core, sglang, or deploy is changed (see deploy-test-vllm for always() rationale)
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' || needs.changed-files.outputs.deploy == 'true' if: |
always() && !failure() &&
(needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' || needs.changed-files.outputs.deploy == 'true')
needs: [changed-files, deploy-operator, sglang-pipeline] needs: [changed-files, deploy-operator, sglang-pipeline]
timeout-minutes: 25 timeout-minutes: 25
permissions: permissions:
...@@ -369,8 +376,10 @@ jobs: ...@@ -369,8 +376,10 @@ jobs:
deploy-test-trtllm: deploy-test-trtllm:
runs-on: prod-default-small-v2 runs-on: prod-default-small-v2
# Run if core, trtllm, or deploy is changed # Run if core, trtllm, or deploy is changed (see deploy-test-vllm for always() rationale)
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' || needs.changed-files.outputs.deploy == 'true' if: |
always() && !failure() &&
(needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' || needs.changed-files.outputs.deploy == 'true')
needs: [changed-files, deploy-operator, trtllm-pipeline] needs: [changed-files, deploy-operator, trtllm-pipeline]
timeout-minutes: 25 timeout-minutes: 25
permissions: permissions:
......
...@@ -20,6 +20,10 @@ spec: ...@@ -20,6 +20,10 @@ spec:
resources: resources:
limits: limits:
gpu: "1" gpu: "1"
requests:
custom:
# Increase this value for larger models
ephemeral-storage: "2Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
......
...@@ -23,6 +23,10 @@ spec: ...@@ -23,6 +23,10 @@ spec:
resources: resources:
limits: limits:
gpu: "1" gpu: "1"
requests:
custom:
# Increase this value for larger models
ephemeral-storage: "2Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
......
...@@ -20,6 +20,10 @@ spec: ...@@ -20,6 +20,10 @@ spec:
resources: resources:
limits: limits:
gpu: "1" gpu: "1"
requests:
custom:
# Increase this value for larger models
ephemeral-storage: "2Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/tensorrtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
......
...@@ -23,6 +23,10 @@ spec: ...@@ -23,6 +23,10 @@ spec:
resources: resources:
limits: limits:
gpu: "1" gpu: "1"
requests:
custom:
# Increase this value for larger models
ephemeral-storage: "2Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/tensorrtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
......
...@@ -21,6 +21,10 @@ spec: ...@@ -21,6 +21,10 @@ spec:
resources: resources:
limits: limits:
gpu: "1" gpu: "1"
requests:
custom:
# Increase this value for larger models
ephemeral-storage: "2Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
......
...@@ -23,6 +23,10 @@ spec: ...@@ -23,6 +23,10 @@ spec:
resources: resources:
limits: limits:
gpu: "1" gpu: "1"
requests:
custom:
# Increase this value for larger models
ephemeral-storage: "2Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
......
...@@ -21,6 +21,10 @@ spec: ...@@ -21,6 +21,10 @@ spec:
resources: resources:
limits: limits:
gpu: "1" gpu: "1"
requests:
custom:
# Increase this value for larger models
ephemeral-storage: "2Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
...@@ -42,6 +46,10 @@ spec: ...@@ -42,6 +46,10 @@ spec:
resources: resources:
limits: limits:
gpu: "1" gpu: "1"
requests:
custom:
# Increase this value for larger models
ephemeral-storage: "2Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
......
...@@ -23,6 +23,10 @@ spec: ...@@ -23,6 +23,10 @@ spec:
resources: resources:
limits: limits:
gpu: "1" gpu: "1"
requests:
custom:
# Increase this value for larger models
ephemeral-storage: "2Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
...@@ -43,6 +47,10 @@ spec: ...@@ -43,6 +47,10 @@ spec:
resources: resources:
limits: limits:
gpu: "1" gpu: "1"
requests:
custom:
# Increase this value for larger models
ephemeral-storage: "2Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment