ci: enable EFA builds in post-merge and release pipelines (#6650)

Signed-off-by: Anant Sharma <anants@nvidia.com>

ci: enable EFA builds in post-merge and release pipelines (#6650)
Signed-off-by: Anant Sharma <anants@nvidia.com>
a110abfb · Anant Sharma · GitHub · 57b55d88 · a110abfb · a110abfb
Unverified Commit a110abfb authored Feb 27, 2026 by Anant Sharma Committed by GitHub Feb 27, 2026
4 changed files
--- a/.github/workflows/build-test-distribute-flavor-matrix.yml
+++ b/.github/workflows/build-test-distribute-flavor-matrix.yml
@@ -103,6 +103,11 @@ on:
        required: false
        type: boolean
        default: true
+      make_efa:
+        description: 'Enable AWS EFA support in the build'
+        required: false
+        type: boolean
+        default: false
    secrets:
      AWS_DEFAULT_REGION:
        required: true
@@ -156,4 +161,5 @@ jobs:
      multi_gpu_test_timeout_minutes: ${{ inputs.multi_gpu_test_timeout_minutes }}
      copy_to_acr: ${{ inputs.copy_to_acr && matrix.platform == 'amd64' }} # no reason to copy ARM images to ACR
      copy_timeout_minutes: ${{ inputs.copy_timeout_minutes }}
+      make_efa: ${{ inputs.make_efa }}
    secrets: inherit
--- a/.github/workflows/build-test-distribute-flavor.yml
+++ b/.github/workflows/build-test-distribute-flavor.yml
@@ -113,6 +113,11 @@ on:
        required: false
        type: boolean
        default: false
+      make_efa:
+        description: 'Enable AWS EFA support in the build'
+        required: false
+        type: boolean
+        default: false
    secrets:
      AWS_DEFAULT_REGION:
        required: true
@@ -191,7 +196,11 @@ jobs:
        run: |
          CUDA_VERSION_RAW=${{ inputs.cuda_version }}
          CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
-          TARGET_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}"
+          EFA_SUFFIX=""
+          if [ "${{ inputs.make_efa }}" == "true" ]; then
+            EFA_SUFFIX="-efa"
+          fi
+          TARGET_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}${EFA_SUFFIX}"
          DEFAULT_TARGET_IMAGE_URI="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG_PLAIN}-cuda${CUDA_VERSION}-${{ inputs.platform }}"
          echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT
          echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT
@@ -219,11 +228,16 @@ jobs:
        shell: bash
        run: |
          echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
+          MAKE_EFA_FLAG=""
+          if [ "${{ inputs.make_efa }}" == "true" ]; then
+            MAKE_EFA_FLAG="--make-efa"
+          fi
          python ./container/render.py \
              --target=${{ inputs.target }} \
              --framework=${{ inputs.framework }} \
              --platform=${{ inputs.platform }} \
              --cuda-version=${{ inputs.cuda_version }} \
+              ${MAKE_EFA_FLAG} \
              --show-result \
              --output-short-filename
      - name: Build Container

--- a/.github/workflows/post-merge-ci.yml
+++ b/.github/workflows/post-merge-ci.yml
@@ -84,12 +84,61 @@ jobs:
      multi_gpu_test_timeout_minutes: 60
    secrets: inherit

+  # ============================================================================
+  # EFA PIPELINES (Build only, amd64)
+  # ============================================================================
+  # ============================================================================
+  # VLLM EFA PIPELINE
+  # ============================================================================
+  vllm-efa-pipeline:
+    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
+    with:
+      framework: vllm
+      target: runtime
+      platforms: '["amd64"]'
+      cuda_versions: '["12.9"]'
+      make_efa: true
+      extra_tags: |
+        ${{ github.ref_name == 'main' && 'main-vllm-efa' || '' }}
+        ${{ github.ref_name == 'main' && format('main-vllm-efa-{0}', github.sha) || '' }}
+      builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
+      build_timeout_minutes: ${{ github.ref_name == 'main' && 120 || 60 }}
+      cpu_only_test_markers: '(pre_merge or post_merge) and vllm and gpu_0'
+      cpu_only_test_timeout_minutes: 60
+      run_single_gpu_tests: false
+      run_multi_gpu_tests: false
+      copy_to_acr: false
+    secrets: inherit
+
+  # ============================================================================
+  # TRTLLM EFA PIPELINE
+  # ============================================================================
+  trtllm-efa-pipeline:
+    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
+    with:
+      framework: trtllm
+      target: runtime
+      platforms: '["amd64"]'
+      cuda_versions: '["13.1"]'
+      make_efa: true
+      extra_tags: |
+        ${{ github.ref_name == 'main' && 'main-trtllm-efa' || '' }}
+        ${{ github.ref_name == 'main' && format('main-trtllm-efa-{0}', github.sha) || '' }}
+      builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
+      build_timeout_minutes: ${{ github.ref_name == 'main' && 120 || 60 }}
+      cpu_only_test_markers: '(pre_merge or post_merge) and trtllm and gpu_0'
+      cpu_only_test_timeout_minutes: 60
+      run_single_gpu_tests: false
+      run_multi_gpu_tests: false
+      copy_to_acr: false
+    secrets: inherit
+
  ############################## SLACK NOTIFICATION ##############################
  notify-slack:
    name: Notify Slack
    runs-on: prod-builder-amd-v1
    if: always() && failure()
-    needs: [ vllm-pipeline, sglang-pipeline, trtllm-pipeline ]
+    needs: [ vllm-pipeline, sglang-pipeline, trtllm-pipeline, vllm-efa-pipeline, trtllm-efa-pipeline ]
    permissions:
      contents: read
    steps:

--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -157,6 +157,60 @@ jobs:
      multi_gpu_test_timeout_minutes: 60
    secrets: inherit

+  # ============================================================================
+  # EFA PIPELINES (Build only, amd64)
+  # ============================================================================
+
+  vllm-efa-pipeline:
+    name: vllm EFA builds
+    needs: [prepare-release, manual-approval]
+    if: |
+      always() &&
+      needs.prepare-release.result == 'success' &&
+      (github.event_name == 'push' || needs.manual-approval.result == 'success')
+    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
+    with:
+      framework: vllm
+      target: runtime
+      platforms: '["amd64"]'
+      cuda_versions: '["12.9"]'
+      make_efa: true
+      extra_tags: |
+        ${{ needs.prepare-release.outputs.image_prefix }}-vllm-efa
+      builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
+      build_timeout_minutes: 120
+      cpu_only_test_markers: '(pre_merge or post_merge) and vllm and gpu_0'
+      cpu_only_test_timeout_minutes: 60
+      run_single_gpu_tests: false
+      run_multi_gpu_tests: false
+      copy_to_acr: false
+    secrets: inherit
+
+  trtllm-efa-pipeline:
+    name: trtllm EFA builds
+    needs: [prepare-release, manual-approval]
+    if: |
+      always() &&
+      needs.prepare-release.result == 'success' &&
+      (github.event_name == 'push' || needs.manual-approval.result == 'success')
+    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
+    with:
+      framework: trtllm
+      target: runtime
+      platforms: '["amd64"]'
+      cuda_versions: '["13.1"]'
+      make_efa: true
+      extra_tags: |
+        ${{ needs.prepare-release.outputs.image_prefix }}-trtllm-efa
+      builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
+      build_timeout_minutes: 120
+      cpu_only_test_markers: '(pre_merge or post_merge) and trtllm and gpu_0'
+      cpu_only_test_timeout_minutes: 60
+      run_single_gpu_tests: false
+      run_multi_gpu_tests: false
+      copy_to_acr: false
+    secrets: inherit
+
  # ============================================================================
  # RELEASE-SPECIFIC BUILDS
  # ============================================================================
@@ -238,7 +292,7 @@ jobs:
    name: Clean K8s builder if exists
    runs-on: prod-default-small-v2
    if: always()
-    needs: [vllm-pipeline, sglang-pipeline, trtllm-pipeline]
+    needs: [vllm-pipeline, sglang-pipeline, trtllm-pipeline, vllm-efa-pipeline, trtllm-efa-pipeline]
    steps:
    - uses: actions/checkout@v4
    - name: Create K8s builders (skip bootstrap)
@@ -406,7 +460,7 @@ jobs:

  release-publish:
    name: Tag RC & Publish to NGC
-    needs: [prepare-release, vllm-pipeline, sglang-pipeline, trtllm-pipeline, operator-build, frontend-build]
+    needs: [prepare-release, vllm-pipeline, sglang-pipeline, trtllm-pipeline, vllm-efa-pipeline, trtllm-efa-pipeline, operator-build, frontend-build]
    if: |
      always() && !cancelled() &&
      needs.prepare-release.result == 'success' &&
@@ -590,6 +644,20 @@ jobs:
              "${NGC_NAME}:${NGC_VERSION_TAG}-cuda13"
          done

+          # ---- EFA runtime images (amd64 only, no multi-arch manifest needed) ----
+          echo ""
+          echo "=== EFA Runtime Images ==="
+
+          # vllm EFA (CUDA 12, amd64 only)
+          SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${IMAGE_PREFIX}-vllm-efa-cuda12-amd64"
+          TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/vllm-runtime:${NGC_VERSION_TAG}-efa"
+          copy_image "${SOURCE}" "${TARGET}" "vllm-runtime:${NGC_VERSION_TAG}-efa"
+
+          # trtllm EFA (CUDA 13, amd64 only)
+          SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${IMAGE_PREFIX}-trtllm-efa-cuda13-amd64"
+          TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/tensorrtllm-runtime:${NGC_VERSION_TAG}-efa"
+          copy_image "${SOURCE}" "${TARGET}" "tensorrtllm-runtime:${NGC_VERSION_TAG}-efa"
+
          # ---- Frontend images ----
          echo ""
          echo "=== Frontend Images ==="
@@ -713,6 +781,10 @@ jobs:
          echo "- \`sglang-runtime:${NGC_VERSION_TAG}-cuda13\` (multi-arch)" >> $GITHUB_STEP_SUMMARY
          echo "- \`tensorrtllm-runtime:${NGC_VERSION_TAG}-cuda13\` (multi-arch)" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "EFA runtime images (amd64 only):" >> $GITHUB_STEP_SUMMARY
+          echo "- \`vllm-runtime:${NGC_VERSION_TAG}-efa\`" >> $GITHUB_STEP_SUMMARY
+          echo "- \`tensorrtllm-runtime:${NGC_VERSION_TAG}-efa\`" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
          echo "Operator image:" >> $GITHUB_STEP_SUMMARY
          echo "- \`kubernetes-operator:${NGC_VERSION_TAG}\`" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY