Unverified Commit a110abfb authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

ci: enable EFA builds in post-merge and release pipelines (#6650)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
parent 57b55d88
......@@ -103,6 +103,11 @@ on:
required: false
type: boolean
default: true
make_efa:
description: 'Enable AWS EFA support in the build'
required: false
type: boolean
default: false
secrets:
AWS_DEFAULT_REGION:
required: true
......@@ -156,4 +161,5 @@ jobs:
multi_gpu_test_timeout_minutes: ${{ inputs.multi_gpu_test_timeout_minutes }}
copy_to_acr: ${{ inputs.copy_to_acr && matrix.platform == 'amd64' }} # no reason to copy ARM images to ACR
copy_timeout_minutes: ${{ inputs.copy_timeout_minutes }}
make_efa: ${{ inputs.make_efa }}
secrets: inherit
......@@ -113,6 +113,11 @@ on:
required: false
type: boolean
default: false
make_efa:
description: 'Enable AWS EFA support in the build'
required: false
type: boolean
default: false
secrets:
AWS_DEFAULT_REGION:
required: true
......@@ -191,7 +196,11 @@ jobs:
run: |
CUDA_VERSION_RAW=${{ inputs.cuda_version }}
CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
TARGET_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}"
EFA_SUFFIX=""
if [ "${{ inputs.make_efa }}" == "true" ]; then
EFA_SUFFIX="-efa"
fi
TARGET_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}${EFA_SUFFIX}"
DEFAULT_TARGET_IMAGE_URI="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG_PLAIN}-cuda${CUDA_VERSION}-${{ inputs.platform }}"
echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT
echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT
......@@ -219,11 +228,16 @@ jobs:
shell: bash
run: |
echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
MAKE_EFA_FLAG=""
if [ "${{ inputs.make_efa }}" == "true" ]; then
MAKE_EFA_FLAG="--make-efa"
fi
python ./container/render.py \
--target=${{ inputs.target }} \
--framework=${{ inputs.framework }} \
--platform=${{ inputs.platform }} \
--cuda-version=${{ inputs.cuda_version }} \
${MAKE_EFA_FLAG} \
--show-result \
--output-short-filename
- name: Build Container
......
......@@ -84,12 +84,61 @@ jobs:
multi_gpu_test_timeout_minutes: 60
secrets: inherit
# ============================================================================
# EFA PIPELINES (Build only, amd64)
# ============================================================================
# ============================================================================
# VLLM EFA PIPELINE
# ============================================================================
vllm-efa-pipeline:
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: vllm
target: runtime
platforms: '["amd64"]'
cuda_versions: '["12.9"]'
make_efa: true
extra_tags: |
${{ github.ref_name == 'main' && 'main-vllm-efa' || '' }}
${{ github.ref_name == 'main' && format('main-vllm-efa-{0}', github.sha) || '' }}
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
build_timeout_minutes: ${{ github.ref_name == 'main' && 120 || 60 }}
cpu_only_test_markers: '(pre_merge or post_merge) and vllm and gpu_0'
cpu_only_test_timeout_minutes: 60
run_single_gpu_tests: false
run_multi_gpu_tests: false
copy_to_acr: false
secrets: inherit
# ============================================================================
# TRTLLM EFA PIPELINE
# ============================================================================
trtllm-efa-pipeline:
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: trtllm
target: runtime
platforms: '["amd64"]'
cuda_versions: '["13.1"]'
make_efa: true
extra_tags: |
${{ github.ref_name == 'main' && 'main-trtllm-efa' || '' }}
${{ github.ref_name == 'main' && format('main-trtllm-efa-{0}', github.sha) || '' }}
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
build_timeout_minutes: ${{ github.ref_name == 'main' && 120 || 60 }}
cpu_only_test_markers: '(pre_merge or post_merge) and trtllm and gpu_0'
cpu_only_test_timeout_minutes: 60
run_single_gpu_tests: false
run_multi_gpu_tests: false
copy_to_acr: false
secrets: inherit
############################## SLACK NOTIFICATION ##############################
notify-slack:
name: Notify Slack
runs-on: prod-builder-amd-v1
if: always() && failure()
needs: [ vllm-pipeline, sglang-pipeline, trtllm-pipeline ]
needs: [ vllm-pipeline, sglang-pipeline, trtllm-pipeline, vllm-efa-pipeline, trtllm-efa-pipeline ]
permissions:
contents: read
steps:
......
......@@ -157,6 +157,60 @@ jobs:
multi_gpu_test_timeout_minutes: 60
secrets: inherit
# ============================================================================
# EFA PIPELINES (Build only, amd64)
# ============================================================================
vllm-efa-pipeline:
name: vllm EFA builds
needs: [prepare-release, manual-approval]
if: |
always() &&
needs.prepare-release.result == 'success' &&
(github.event_name == 'push' || needs.manual-approval.result == 'success')
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: vllm
target: runtime
platforms: '["amd64"]'
cuda_versions: '["12.9"]'
make_efa: true
extra_tags: |
${{ needs.prepare-release.outputs.image_prefix }}-vllm-efa
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
build_timeout_minutes: 120
cpu_only_test_markers: '(pre_merge or post_merge) and vllm and gpu_0'
cpu_only_test_timeout_minutes: 60
run_single_gpu_tests: false
run_multi_gpu_tests: false
copy_to_acr: false
secrets: inherit
trtllm-efa-pipeline:
name: trtllm EFA builds
needs: [prepare-release, manual-approval]
if: |
always() &&
needs.prepare-release.result == 'success' &&
(github.event_name == 'push' || needs.manual-approval.result == 'success')
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: trtllm
target: runtime
platforms: '["amd64"]'
cuda_versions: '["13.1"]'
make_efa: true
extra_tags: |
${{ needs.prepare-release.outputs.image_prefix }}-trtllm-efa
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
build_timeout_minutes: 120
cpu_only_test_markers: '(pre_merge or post_merge) and trtllm and gpu_0'
cpu_only_test_timeout_minutes: 60
run_single_gpu_tests: false
run_multi_gpu_tests: false
copy_to_acr: false
secrets: inherit
# ============================================================================
# RELEASE-SPECIFIC BUILDS
# ============================================================================
......@@ -238,7 +292,7 @@ jobs:
name: Clean K8s builder if exists
runs-on: prod-default-small-v2
if: always()
needs: [vllm-pipeline, sglang-pipeline, trtllm-pipeline]
needs: [vllm-pipeline, sglang-pipeline, trtllm-pipeline, vllm-efa-pipeline, trtllm-efa-pipeline]
steps:
- uses: actions/checkout@v4
- name: Create K8s builders (skip bootstrap)
......@@ -406,7 +460,7 @@ jobs:
release-publish:
name: Tag RC & Publish to NGC
needs: [prepare-release, vllm-pipeline, sglang-pipeline, trtllm-pipeline, operator-build, frontend-build]
needs: [prepare-release, vllm-pipeline, sglang-pipeline, trtllm-pipeline, vllm-efa-pipeline, trtllm-efa-pipeline, operator-build, frontend-build]
if: |
always() && !cancelled() &&
needs.prepare-release.result == 'success' &&
......@@ -590,6 +644,20 @@ jobs:
"${NGC_NAME}:${NGC_VERSION_TAG}-cuda13"
done
# ---- EFA runtime images (amd64 only, no multi-arch manifest needed) ----
echo ""
echo "=== EFA Runtime Images ==="
# vllm EFA (CUDA 12, amd64 only)
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${IMAGE_PREFIX}-vllm-efa-cuda12-amd64"
TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/vllm-runtime:${NGC_VERSION_TAG}-efa"
copy_image "${SOURCE}" "${TARGET}" "vllm-runtime:${NGC_VERSION_TAG}-efa"
# trtllm EFA (CUDA 13, amd64 only)
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${IMAGE_PREFIX}-trtllm-efa-cuda13-amd64"
TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/tensorrtllm-runtime:${NGC_VERSION_TAG}-efa"
copy_image "${SOURCE}" "${TARGET}" "tensorrtllm-runtime:${NGC_VERSION_TAG}-efa"
# ---- Frontend images ----
echo ""
echo "=== Frontend Images ==="
......@@ -713,6 +781,10 @@ jobs:
echo "- \`sglang-runtime:${NGC_VERSION_TAG}-cuda13\` (multi-arch)" >> $GITHUB_STEP_SUMMARY
echo "- \`tensorrtllm-runtime:${NGC_VERSION_TAG}-cuda13\` (multi-arch)" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "EFA runtime images (amd64 only):" >> $GITHUB_STEP_SUMMARY
echo "- \`vllm-runtime:${NGC_VERSION_TAG}-efa\`" >> $GITHUB_STEP_SUMMARY
echo "- \`tensorrtllm-runtime:${NGC_VERSION_TAG}-efa\`" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "Operator image:" >> $GITHUB_STEP_SUMMARY
echo "- \`kubernetes-operator:${NGC_VERSION_TAG}\`" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment