Unverified Commit a110abfb authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

ci: enable EFA builds in post-merge and release pipelines (#6650)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
parent 57b55d88
...@@ -103,6 +103,11 @@ on: ...@@ -103,6 +103,11 @@ on:
required: false required: false
type: boolean type: boolean
default: true default: true
make_efa:
description: 'Enable AWS EFA support in the build'
required: false
type: boolean
default: false
secrets: secrets:
AWS_DEFAULT_REGION: AWS_DEFAULT_REGION:
required: true required: true
...@@ -156,4 +161,5 @@ jobs: ...@@ -156,4 +161,5 @@ jobs:
multi_gpu_test_timeout_minutes: ${{ inputs.multi_gpu_test_timeout_minutes }} multi_gpu_test_timeout_minutes: ${{ inputs.multi_gpu_test_timeout_minutes }}
copy_to_acr: ${{ inputs.copy_to_acr && matrix.platform == 'amd64' }} # no reason to copy ARM images to ACR copy_to_acr: ${{ inputs.copy_to_acr && matrix.platform == 'amd64' }} # no reason to copy ARM images to ACR
copy_timeout_minutes: ${{ inputs.copy_timeout_minutes }} copy_timeout_minutes: ${{ inputs.copy_timeout_minutes }}
make_efa: ${{ inputs.make_efa }}
secrets: inherit secrets: inherit
...@@ -113,6 +113,11 @@ on: ...@@ -113,6 +113,11 @@ on:
required: false required: false
type: boolean type: boolean
default: false default: false
make_efa:
description: 'Enable AWS EFA support in the build'
required: false
type: boolean
default: false
secrets: secrets:
AWS_DEFAULT_REGION: AWS_DEFAULT_REGION:
required: true required: true
...@@ -191,7 +196,11 @@ jobs: ...@@ -191,7 +196,11 @@ jobs:
run: | run: |
CUDA_VERSION_RAW=${{ inputs.cuda_version }} CUDA_VERSION_RAW=${{ inputs.cuda_version }}
CUDA_VERSION=${CUDA_VERSION_RAW%%.*} CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
TARGET_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}" EFA_SUFFIX=""
if [ "${{ inputs.make_efa }}" == "true" ]; then
EFA_SUFFIX="-efa"
fi
TARGET_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}${EFA_SUFFIX}"
DEFAULT_TARGET_IMAGE_URI="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG_PLAIN}-cuda${CUDA_VERSION}-${{ inputs.platform }}" DEFAULT_TARGET_IMAGE_URI="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG_PLAIN}-cuda${CUDA_VERSION}-${{ inputs.platform }}"
echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT
echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT
...@@ -219,11 +228,16 @@ jobs: ...@@ -219,11 +228,16 @@ jobs:
shell: bash shell: bash
run: | run: |
echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}" echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
MAKE_EFA_FLAG=""
if [ "${{ inputs.make_efa }}" == "true" ]; then
MAKE_EFA_FLAG="--make-efa"
fi
python ./container/render.py \ python ./container/render.py \
--target=${{ inputs.target }} \ --target=${{ inputs.target }} \
--framework=${{ inputs.framework }} \ --framework=${{ inputs.framework }} \
--platform=${{ inputs.platform }} \ --platform=${{ inputs.platform }} \
--cuda-version=${{ inputs.cuda_version }} \ --cuda-version=${{ inputs.cuda_version }} \
${MAKE_EFA_FLAG} \
--show-result \ --show-result \
--output-short-filename --output-short-filename
- name: Build Container - name: Build Container
......
...@@ -84,12 +84,61 @@ jobs: ...@@ -84,12 +84,61 @@ jobs:
multi_gpu_test_timeout_minutes: 60 multi_gpu_test_timeout_minutes: 60
secrets: inherit secrets: inherit
# ============================================================================
# EFA PIPELINES (Build only, amd64)
# ============================================================================
# ============================================================================
# VLLM EFA PIPELINE
# ============================================================================
vllm-efa-pipeline:
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: vllm
target: runtime
platforms: '["amd64"]'
cuda_versions: '["12.9"]'
make_efa: true
extra_tags: |
${{ github.ref_name == 'main' && 'main-vllm-efa' || '' }}
${{ github.ref_name == 'main' && format('main-vllm-efa-{0}', github.sha) || '' }}
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
build_timeout_minutes: ${{ github.ref_name == 'main' && 120 || 60 }}
cpu_only_test_markers: '(pre_merge or post_merge) and vllm and gpu_0'
cpu_only_test_timeout_minutes: 60
run_single_gpu_tests: false
run_multi_gpu_tests: false
copy_to_acr: false
secrets: inherit
# ============================================================================
# TRTLLM EFA PIPELINE
# ============================================================================
trtllm-efa-pipeline:
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: trtllm
target: runtime
platforms: '["amd64"]'
cuda_versions: '["13.1"]'
make_efa: true
extra_tags: |
${{ github.ref_name == 'main' && 'main-trtllm-efa' || '' }}
${{ github.ref_name == 'main' && format('main-trtllm-efa-{0}', github.sha) || '' }}
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
build_timeout_minutes: ${{ github.ref_name == 'main' && 120 || 60 }}
cpu_only_test_markers: '(pre_merge or post_merge) and trtllm and gpu_0'
cpu_only_test_timeout_minutes: 60
run_single_gpu_tests: false
run_multi_gpu_tests: false
copy_to_acr: false
secrets: inherit
############################## SLACK NOTIFICATION ############################## ############################## SLACK NOTIFICATION ##############################
notify-slack: notify-slack:
name: Notify Slack name: Notify Slack
runs-on: prod-builder-amd-v1 runs-on: prod-builder-amd-v1
if: always() && failure() if: always() && failure()
needs: [ vllm-pipeline, sglang-pipeline, trtllm-pipeline ] needs: [ vllm-pipeline, sglang-pipeline, trtllm-pipeline, vllm-efa-pipeline, trtllm-efa-pipeline ]
permissions: permissions:
contents: read contents: read
steps: steps:
......
...@@ -157,6 +157,60 @@ jobs: ...@@ -157,6 +157,60 @@ jobs:
multi_gpu_test_timeout_minutes: 60 multi_gpu_test_timeout_minutes: 60
secrets: inherit secrets: inherit
# ============================================================================
# EFA PIPELINES (Build only, amd64)
# ============================================================================
vllm-efa-pipeline:
name: vllm EFA builds
needs: [prepare-release, manual-approval]
if: |
always() &&
needs.prepare-release.result == 'success' &&
(github.event_name == 'push' || needs.manual-approval.result == 'success')
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: vllm
target: runtime
platforms: '["amd64"]'
cuda_versions: '["12.9"]'
make_efa: true
extra_tags: |
${{ needs.prepare-release.outputs.image_prefix }}-vllm-efa
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
build_timeout_minutes: 120
cpu_only_test_markers: '(pre_merge or post_merge) and vllm and gpu_0'
cpu_only_test_timeout_minutes: 60
run_single_gpu_tests: false
run_multi_gpu_tests: false
copy_to_acr: false
secrets: inherit
trtllm-efa-pipeline:
name: trtllm EFA builds
needs: [prepare-release, manual-approval]
if: |
always() &&
needs.prepare-release.result == 'success' &&
(github.event_name == 'push' || needs.manual-approval.result == 'success')
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: trtllm
target: runtime
platforms: '["amd64"]'
cuda_versions: '["13.1"]'
make_efa: true
extra_tags: |
${{ needs.prepare-release.outputs.image_prefix }}-trtllm-efa
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
build_timeout_minutes: 120
cpu_only_test_markers: '(pre_merge or post_merge) and trtllm and gpu_0'
cpu_only_test_timeout_minutes: 60
run_single_gpu_tests: false
run_multi_gpu_tests: false
copy_to_acr: false
secrets: inherit
# ============================================================================ # ============================================================================
# RELEASE-SPECIFIC BUILDS # RELEASE-SPECIFIC BUILDS
# ============================================================================ # ============================================================================
...@@ -238,7 +292,7 @@ jobs: ...@@ -238,7 +292,7 @@ jobs:
name: Clean K8s builder if exists name: Clean K8s builder if exists
runs-on: prod-default-small-v2 runs-on: prod-default-small-v2
if: always() if: always()
needs: [vllm-pipeline, sglang-pipeline, trtllm-pipeline] needs: [vllm-pipeline, sglang-pipeline, trtllm-pipeline, vllm-efa-pipeline, trtllm-efa-pipeline]
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Create K8s builders (skip bootstrap) - name: Create K8s builders (skip bootstrap)
...@@ -406,7 +460,7 @@ jobs: ...@@ -406,7 +460,7 @@ jobs:
release-publish: release-publish:
name: Tag RC & Publish to NGC name: Tag RC & Publish to NGC
needs: [prepare-release, vllm-pipeline, sglang-pipeline, trtllm-pipeline, operator-build, frontend-build] needs: [prepare-release, vllm-pipeline, sglang-pipeline, trtllm-pipeline, vllm-efa-pipeline, trtllm-efa-pipeline, operator-build, frontend-build]
if: | if: |
always() && !cancelled() && always() && !cancelled() &&
needs.prepare-release.result == 'success' && needs.prepare-release.result == 'success' &&
...@@ -590,6 +644,20 @@ jobs: ...@@ -590,6 +644,20 @@ jobs:
"${NGC_NAME}:${NGC_VERSION_TAG}-cuda13" "${NGC_NAME}:${NGC_VERSION_TAG}-cuda13"
done done
# ---- EFA runtime images (amd64 only, no multi-arch manifest needed) ----
echo ""
echo "=== EFA Runtime Images ==="
# vllm EFA (CUDA 12, amd64 only)
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${IMAGE_PREFIX}-vllm-efa-cuda12-amd64"
TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/vllm-runtime:${NGC_VERSION_TAG}-efa"
copy_image "${SOURCE}" "${TARGET}" "vllm-runtime:${NGC_VERSION_TAG}-efa"
# trtllm EFA (CUDA 13, amd64 only)
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${IMAGE_PREFIX}-trtllm-efa-cuda13-amd64"
TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/tensorrtllm-runtime:${NGC_VERSION_TAG}-efa"
copy_image "${SOURCE}" "${TARGET}" "tensorrtllm-runtime:${NGC_VERSION_TAG}-efa"
# ---- Frontend images ---- # ---- Frontend images ----
echo "" echo ""
echo "=== Frontend Images ===" echo "=== Frontend Images ==="
...@@ -713,6 +781,10 @@ jobs: ...@@ -713,6 +781,10 @@ jobs:
echo "- \`sglang-runtime:${NGC_VERSION_TAG}-cuda13\` (multi-arch)" >> $GITHUB_STEP_SUMMARY echo "- \`sglang-runtime:${NGC_VERSION_TAG}-cuda13\` (multi-arch)" >> $GITHUB_STEP_SUMMARY
echo "- \`tensorrtllm-runtime:${NGC_VERSION_TAG}-cuda13\` (multi-arch)" >> $GITHUB_STEP_SUMMARY echo "- \`tensorrtllm-runtime:${NGC_VERSION_TAG}-cuda13\` (multi-arch)" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY
echo "EFA runtime images (amd64 only):" >> $GITHUB_STEP_SUMMARY
echo "- \`vllm-runtime:${NGC_VERSION_TAG}-efa\`" >> $GITHUB_STEP_SUMMARY
echo "- \`tensorrtllm-runtime:${NGC_VERSION_TAG}-efa\`" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "Operator image:" >> $GITHUB_STEP_SUMMARY echo "Operator image:" >> $GITHUB_STEP_SUMMARY
echo "- \`kubernetes-operator:${NGC_VERSION_TAG}\`" >> $GITHUB_STEP_SUMMARY echo "- \`kubernetes-operator:${NGC_VERSION_TAG}\`" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment