chore: add ephemeral-storage requests to deploy profiles (#6723)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Dmitry Tokarev <dtokarev@nvidia.com>

chore: add ephemeral-storage requests to deploy profiles (#6723)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Dmitry Tokarev <dtokarev@nvidia.com>
876c9761 · Tushar Sharma · GitHub · 9c6f36e9 · 876c9761 · 876c9761
Unverified Commit 876c9761 authored Mar 05, 2026 by Tushar Sharma Committed by GitHub Mar 05, 2026
9 changed files
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -297,7 +297,12 @@ jobs:
   # ============================================================================
  deploy-test-vllm:
    # Run if core, vllm, or deploy is changed
-    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' || needs.changed-files.outputs.deploy == 'true'
+    # always() is needed because reusable workflows with skipped internal jobs
+    # (e.g. multi-gpu tests, arm64 copy) cause GitHub Actions to skip dependent
+    # jobs unless always() is present. !failure() ensures we still skip on real failures.
+    if: |
+      always() && !failure() &&
+      (needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' || needs.changed-files.outputs.deploy == 'true')
    runs-on: prod-default-small-v2
    needs: [changed-files, deploy-operator, vllm-pipeline]
    timeout-minutes: 25
@@ -334,8 +339,10 @@ jobs:
  deploy-test-sglang:
    runs-on: prod-default-small-v2
-    # Run if core, sglang, or deploy is changed
+    # Run if core, sglang, or deploy is changed (see deploy-test-vllm for always() rationale)
-    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' || needs.changed-files.outputs.deploy == 'true'
+    if: |
+      always() && !failure() &&
+      (needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' || needs.changed-files.outputs.deploy == 'true')
    needs: [changed-files, deploy-operator, sglang-pipeline]
    timeout-minutes: 25
    permissions:
@@ -369,8 +376,10 @@ jobs:
  deploy-test-trtllm:
    runs-on: prod-default-small-v2
-    # Run if core, trtllm, or deploy is changed
+    # Run if core, trtllm, or deploy is changed (see deploy-test-vllm for always() rationale)
-    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' || needs.changed-files.outputs.deploy == 'true'
+    if: |
+      always() && !failure() &&
+      (needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' || needs.changed-files.outputs.deploy == 'true')
    needs: [changed-files, deploy-operator, trtllm-pipeline]
    timeout-minutes: 25
    permissions:

--- a/examples/backends/sglang/deploy/agg.yaml
+++ b/examples/backends/sglang/deploy/agg.yaml
@@ -20,6 +20,10 @@ spec:
      resources:
        limits:
          gpu: "1"
+        requests:
+          custom:
+            # Increase this value for larger models
+            ephemeral-storage: "2Gi"
      extraPodSpec:
        mainContainer:
          image: my-registry/sglang-runtime:my-tag

--- a/examples/backends/sglang/deploy/agg_router.yaml
+++ b/examples/backends/sglang/deploy/agg_router.yaml
@@ -23,6 +23,10 @@ spec:
      resources:
        limits:
          gpu: "1"
+        requests:
+          custom:
+            # Increase this value for larger models
+            ephemeral-storage: "2Gi"
      extraPodSpec:
        mainContainer:
          image: my-registry/sglang-runtime:my-tag

--- a/examples/backends/trtllm/deploy/agg.yaml
+++ b/examples/backends/trtllm/deploy/agg.yaml
@@ -20,6 +20,10 @@ spec:
      resources:
        limits:
          gpu: "1"
+        requests:
+          custom:
+            # Increase this value for larger models
+            ephemeral-storage: "2Gi"
      extraPodSpec:
        mainContainer:
          image: my-registry/tensorrtllm-runtime:my-tag

--- a/examples/backends/trtllm/deploy/agg_router.yaml
+++ b/examples/backends/trtllm/deploy/agg_router.yaml
@@ -23,6 +23,10 @@ spec:
      resources:
        limits:
          gpu: "1"
+        requests:
+          custom:
+            # Increase this value for larger models
+            ephemeral-storage: "2Gi"
      extraPodSpec:
        mainContainer:
          image: my-registry/tensorrtllm-runtime:my-tag

--- a/examples/backends/vllm/deploy/agg.yaml
+++ b/examples/backends/vllm/deploy/agg.yaml
@@ -21,6 +21,10 @@ spec:
      resources:
        limits:
          gpu: "1"
+        requests:
+          custom:
+            # Increase this value for larger models
+            ephemeral-storage: "2Gi"
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag

--- a/examples/backends/vllm/deploy/agg_router.yaml
+++ b/examples/backends/vllm/deploy/agg_router.yaml
@@ -23,6 +23,10 @@ spec:
      resources:
        limits:
          gpu: "1"
+        requests:
+          custom:
+            # Increase this value for larger models
+            ephemeral-storage: "2Gi"
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag

--- a/examples/backends/vllm/deploy/disagg.yaml
+++ b/examples/backends/vllm/deploy/disagg.yaml
@@ -21,6 +21,10 @@ spec:
      resources:
        limits:
          gpu: "1"
+        requests:
+          custom:
+            # Increase this value for larger models
+            ephemeral-storage: "2Gi"
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
@@ -42,6 +46,10 @@ spec:
      resources:
        limits:
          gpu: "1"
+        requests:
+          custom:
+            # Increase this value for larger models
+            ephemeral-storage: "2Gi"
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag

--- a/examples/backends/vllm/deploy/disagg_router.yaml
+++ b/examples/backends/vllm/deploy/disagg_router.yaml
@@ -23,6 +23,10 @@ spec:
      resources:
        limits:
          gpu: "1"
+        requests:
+          custom:
+            # Increase this value for larger models
+            ephemeral-storage: "2Gi"
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
@@ -43,6 +47,10 @@ spec:
      resources:
        limits:
          gpu: "1"
+        requests:
+          custom:
+            # Increase this value for larger models
+            ephemeral-storage: "2Gi"
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag