Unverified Commit 97ef59dc authored by Ran Rubin's avatar Ran Rubin Committed by GitHub
Browse files

ci: Enable build on demand of container images (#6935)


Signed-off-by: default avatarRan Rubin <ranrubin@gmail.com>
Co-authored-by: default avatarcoderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
parent 84de4e8b
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
name: Build On Demand
on:
workflow_dispatch:
inputs:
build_vllm:
description: 'Build and push vllm image, tagged with branch name'
type: boolean
default: false
build_sglang:
description: 'Build and push sglang image, tagged with branch name'
type: boolean
default: false
build_trtllm:
description: 'Build and push trtllm image, tagged with branch name'
type: boolean
default: false
build_operator:
description: 'Build and push operator image, tagged with branch name'
type: boolean
default: false
env:
BUILDER_NAME: b-${{ github.run_id }}-${{ github.run_attempt }}
jobs:
init:
runs-on: ubuntu-slim
outputs:
builder_name: ${{ steps.export-builder-name.outputs.builder_name }}
sanitized_ref_name: ${{ steps.sanitize-ref.outputs.sanitized_ref_name }}
steps:
- name: Export builder name
id: export-builder-name
run: |
echo "builder_name=${{ env.BUILDER_NAME }}" >> $GITHUB_OUTPUT
- name: Sanitize ref name for container tags
id: sanitize-ref
shell: bash
run: |
sanitized=$(echo "${{ github.ref_name }}" | sed 's/[^a-zA-Z0-9._-]/-/g' | sed 's/^-//;s/-$//' | cut -c1-128)
echo "sanitized_ref_name=${sanitized}" >> $GITHUB_OUTPUT
# ============================================================================
# FRAMEWORK PIPELINES (build-only)
# ============================================================================
vllm-pipeline:
needs: [init]
if: inputs.build_vllm
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: vllm
target: runtime
platforms: '["amd64", "arm64"]'
cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.init.outputs.builder_name }}
build_timeout_minutes: 60
build_only: true
sanitized_ref_name: ${{ needs.init.outputs.sanitized_ref_name }}
secrets: inherit
sglang-pipeline:
needs: [init]
if: inputs.build_sglang
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: sglang
target: runtime
platforms: '["amd64", "arm64"]'
cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.init.outputs.builder_name }}
build_timeout_minutes: 60
build_only: true
sanitized_ref_name: ${{ needs.init.outputs.sanitized_ref_name }}
secrets: inherit
trtllm-pipeline:
needs: [init]
if: inputs.build_trtllm
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: trtllm
target: runtime
platforms: '["amd64", "arm64"]'
cuda_versions: '["13.1"]'
builder_name: ${{ needs.init.outputs.builder_name }}
build_timeout_minutes: 60
build_only: true
sanitized_ref_name: ${{ needs.init.outputs.sanitized_ref_name }}
secrets: inherit
# ============================================================================
# OPERATOR (build-only)
# ============================================================================
operator:
needs: [init]
if: inputs.build_operator
name: Operator
runs-on: prod-default-v2
env:
IMAGE_REGISTRY: ai-dynamo
IMAGE_REPOSITORY: dynamo
ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
steps:
- name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Initialize Dynamo Builder
uses: ./.github/actions/init-dynamo-builder
with:
builder_name: ${{ needs.init.outputs.builder_name }}
flavor: general
all_arch: 'true'
- name: Docker Login
uses: ./.github/actions/docker-login
with:
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
- name: Build and push Container
shell: bash
working-directory: ./deploy/operator
run: |
ECR_DEFAULT_IMAGE_BASE="${ECR_HOSTNAME}/${IMAGE_REGISTRY}/${IMAGE_REPOSITORY}"
ACR_IMAGE_BASE="${{ secrets.AZURE_ACR_HOSTNAME }}/${IMAGE_REGISTRY}/${IMAGE_REPOSITORY}"
DEFAULT_TAG="${{ github.sha }}-operator"
BRANCH_TAG="${{ needs.init.outputs.sanitized_ref_name }}-operator"
IMAGE_URIS=(
"${ECR_DEFAULT_IMAGE_BASE}:${DEFAULT_TAG}"
"${ACR_IMAGE_BASE}:${DEFAULT_TAG}"
"${ECR_DEFAULT_IMAGE_BASE}:${BRANCH_TAG}"
"${ACR_IMAGE_BASE}:${BRANCH_TAG}"
)
TAGGING_FLAGS=$(printf -- '-t %s ' "${IMAGE_URIS[@]}")
echo "flags for docker buildx: ${TAGGING_FLAGS}"
docker buildx build --push \
--platform linux/amd64,linux/arm64 \
--build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ \
${TAGGING_FLAGS} -f Dockerfile .
echo "### Operator Container Images" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Image URI |" >> $GITHUB_STEP_SUMMARY
echo "|-----|" >> $GITHUB_STEP_SUMMARY
for image_uri in "${IMAGE_URIS[@]}"; do
echo "| \`${image_uri}\` |" >> $GITHUB_STEP_SUMMARY
done
# ============================================================================
# CLEANUP
# ============================================================================
clean-k8s-builder:
name: Clean K8s builder if exists
runs-on: prod-default-small-v2
if: always()
needs: [init, vllm-pipeline, sglang-pipeline, trtllm-pipeline, operator]
steps:
- name: Checkout repository
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Create K8s builders (skip bootstrap)
uses: ./.github/actions/bootstrap-buildkit
continue-on-error: true
with:
builder_name: ${{ needs.init.outputs.builder_name }}
buildkit_worker_addresses: ''
skip_bootstrap: true
- name: Builder Cleanup in case of k8s builder
shell: bash
run: |
docker buildx rm ${{ needs.init.outputs.builder_name }} || true
...@@ -108,6 +108,21 @@ on: ...@@ -108,6 +108,21 @@ on:
required: false required: false
type: boolean type: boolean
default: false default: false
show_summary:
description: 'Show summary'
required: false
type: boolean
default: false
build_only:
description: 'Build and push only skip all tests, show summary'
required: false
type: boolean
default: false
sanitized_ref_name:
description: 'Sanitized git ref name for branch-tagged images (used with build_only)'
required: false
type: string
default: ''
secrets: secrets:
AWS_DEFAULT_REGION: AWS_DEFAULT_REGION:
required: true required: true
...@@ -119,8 +134,6 @@ on: ...@@ -119,8 +134,6 @@ on:
required: true required: true
AZURE_ACR_PASSWORD: AZURE_ACR_PASSWORD:
required: true required: true
CI_TOKEN:
required: false
SCCACHE_S3_BUCKET: SCCACHE_S3_BUCKET:
required: false required: false
AWS_ACCESS_KEY_ID: AWS_ACCESS_KEY_ID:
...@@ -147,7 +160,7 @@ jobs: ...@@ -147,7 +160,7 @@ jobs:
extra_tags: ${{ inputs.extra_tags }} extra_tags: ${{ inputs.extra_tags }}
no_cache: ${{ inputs.no_cache }} no_cache: ${{ inputs.no_cache }}
builder_name: ${{ inputs.builder_name }} builder_name: ${{ inputs.builder_name }}
build_image: ${{ inputs.build_image }} build_image: ${{ inputs.build_only || inputs.build_image }}
build_timeout_minutes: ${{ inputs.build_timeout_minutes }} build_timeout_minutes: ${{ inputs.build_timeout_minutes }}
push_image: ${{ inputs.push_image }} push_image: ${{ inputs.push_image }}
run_cpu_only_tests: ${{ inputs.run_cpu_only_tests }} run_cpu_only_tests: ${{ inputs.run_cpu_only_tests }}
...@@ -162,4 +175,7 @@ jobs: ...@@ -162,4 +175,7 @@ jobs:
copy_to_acr: ${{ inputs.copy_to_acr && matrix.platform == 'amd64' }} # no reason to copy ARM images to ACR copy_to_acr: ${{ inputs.copy_to_acr && matrix.platform == 'amd64' }} # no reason to copy ARM images to ACR
copy_timeout_minutes: ${{ inputs.copy_timeout_minutes }} copy_timeout_minutes: ${{ inputs.copy_timeout_minutes }}
make_efa: ${{ inputs.make_efa }} make_efa: ${{ inputs.make_efa }}
show_summary: ${{ inputs.build_only || inputs.show_summary }}
build_only: ${{ inputs.build_only }}
sanitized_ref_name: ${{ inputs.sanitized_ref_name }}
secrets: inherit secrets: inherit
...@@ -118,6 +118,16 @@ on: ...@@ -118,6 +118,16 @@ on:
required: false required: false
type: boolean type: boolean
default: false default: false
build_only:
description: 'Build and push only skip all tests, show summary'
required: false
type: boolean
default: false
sanitized_ref_name:
description: 'Sanitized git ref name for branch-tagged images (used with build_only)'
required: false
type: string
default: ''
secrets: secrets:
AWS_DEFAULT_REGION: AWS_DEFAULT_REGION:
required: true required: true
...@@ -129,8 +139,6 @@ on: ...@@ -129,8 +139,6 @@ on:
required: true required: true
AZURE_ACR_PASSWORD: AZURE_ACR_PASSWORD:
required: true required: true
CI_TOKEN:
required: false
SCCACHE_S3_BUCKET: SCCACHE_S3_BUCKET:
required: false required: false
AWS_ACCESS_KEY_ID: AWS_ACCESS_KEY_ID:
...@@ -161,27 +169,6 @@ jobs: ...@@ -161,27 +169,6 @@ jobs:
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with: with:
lfs: true lfs: true
- name: Calculate extra tags with platform suffix # will get redundant upon multi arch builds support
id: extra-tags
shell: bash
env:
EXTRA_TAGS: ${{ inputs.extra_tags }}
CUDA_VERSION: ${{ inputs.cuda_version }}
run: |
CUDA_VERSION_MAJOR=${CUDA_VERSION%%.*}
if [ -n "$EXTRA_TAGS" ]; then
RESULT=""
while IFS= read -r tag; do
if [ -n "$tag" ]; then
RESULT+="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${tag}-cuda${CUDA_VERSION_MAJOR}-${{ inputs.platform }}"$'\n'
fi
done <<< "$EXTRA_TAGS"
echo "tags<<EOF" >> $GITHUB_OUTPUT
echo "$RESULT" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
else
echo "tags=" >> $GITHUB_OUTPUT
fi
- name: Docker Login - name: Docker Login
uses: ./.github/actions/docker-login uses: ./.github/actions/docker-login
with: with:
...@@ -212,6 +199,37 @@ jobs: ...@@ -212,6 +199,37 @@ jobs:
flavor: ${{ inputs.framework }} flavor: ${{ inputs.framework }}
arch: ${{ inputs.platform }} arch: ${{ inputs.platform }}
cuda_version: ${{ inputs.cuda_version }} cuda_version: ${{ inputs.cuda_version }}
- name: Calculate extra tags with platform suffix # will get redundant upon multi arch builds support
id: extra-tags
shell: bash
env:
EXTRA_TAGS: ${{ inputs.extra_tags }}
CUDA_VERSION: ${{ inputs.cuda_version }}
run: |
CUDA_VERSION_MAJOR=${CUDA_VERSION%%.*}
ECR_REGISTRY="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com"
ACR_REGISTRY="${{ secrets.AZURE_ACR_HOSTNAME }}"
RESULT=""
if [ -n "$EXTRA_TAGS" ]; then
while IFS= read -r tag; do
if [ -n "$tag" ]; then
RESULT+="${ECR_REGISTRY}/ai-dynamo/dynamo:${tag}-cuda${CUDA_VERSION_MAJOR}-${{ inputs.platform }}"$'\n'
fi
done <<< "$EXTRA_TAGS"
fi
if [ "${{ inputs.build_only }}" == "true" ]; then
BRANCH_TAG="${{ inputs.sanitized_ref_name }}-${{ inputs.framework }}"
RESULT+="${ECR_REGISTRY}/ai-dynamo/dynamo:${BRANCH_TAG}-cuda${CUDA_VERSION_MAJOR}-${{ inputs.platform }}"$'\n'
RESULT+="${ACR_REGISTRY}/ai-dynamo/dynamo:${BRANCH_TAG}-cuda${CUDA_VERSION_MAJOR}-${{ inputs.platform }}"$'\n'
RESULT+="${ACR_REGISTRY}/ai-dynamo/dynamo:${{ steps.calculate-target-tag.outputs.target_tag_plain }}-cuda${CUDA_VERSION_MAJOR}-${{ inputs.platform }}"$'\n'
fi
if [ -n "$RESULT" ]; then
echo "tags<<EOF" >> $GITHUB_OUTPUT
echo "$RESULT" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
else
echo "tags=" >> $GITHUB_OUTPUT
fi
- name: Print Build Container inputs - name: Print Build Container inputs
run: | run: |
echo "=== Build Container Inputs ===" echo "=== Build Container Inputs ==="
...@@ -250,7 +268,6 @@ jobs: ...@@ -250,7 +268,6 @@ jobs:
target: ${{ inputs.target }} target: ${{ inputs.target }}
platform: ${{ inputs.platform }} platform: ${{ inputs.platform }}
cuda_version: ${{ inputs.cuda_version }} cuda_version: ${{ inputs.cuda_version }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }} sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
...@@ -269,13 +286,29 @@ jobs: ...@@ -269,13 +286,29 @@ jobs:
echo "| Image URI |" >> $GITHUB_STEP_SUMMARY echo "| Image URI |" >> $GITHUB_STEP_SUMMARY
echo "|-----|" >> $GITHUB_STEP_SUMMARY echo "|-----|" >> $GITHUB_STEP_SUMMARY
echo "| \`${{ steps.calculate-target-tag.outputs.default_target_image_uri }}\` |" >> $GITHUB_STEP_SUMMARY echo "| \`${{ steps.calculate-target-tag.outputs.default_target_image_uri }}\` |" >> $GITHUB_STEP_SUMMARY
EXTRA_TAGS="${{ steps.extra-tags.outputs.tags }}"
if [ -n "$EXTRA_TAGS" ]; then
echo "" >> $GITHUB_STEP_SUMMARY
echo "### 🏷️ Extra Tags" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Image URI |" >> $GITHUB_STEP_SUMMARY
echo "|-----|" >> $GITHUB_STEP_SUMMARY
while IFS= read -r tag; do
if [ -n "$tag" ]; then
echo "| \`${tag}\` |" >> $GITHUB_STEP_SUMMARY
fi
done <<< "$EXTRA_TAGS"
fi
# ============================================================================ # ============================================================================
# TEST # TEST
# ============================================================================ # ============================================================================
test: test:
if: ( inputs.run_cpu_only_tests || inputs.run_single_gpu_tests ) && inputs.build_image if: |
!inputs.build_only &&
( inputs.run_cpu_only_tests || inputs.run_single_gpu_tests ) &&
inputs.build_image
needs: [build] needs: [build]
name: Test ${{ inputs.framework }}-cuda${{ inputs.cuda_version }}-${{ inputs.platform }} name: Test ${{ inputs.framework }}-cuda${{ inputs.cuda_version }}-${{ inputs.platform }}
runs-on: ${{ inputs.platform == 'amd64' && 'prod-tester-amd-gpu-v1' || 'prod-tester-arm-v1' }} runs-on: ${{ inputs.platform == 'amd64' && 'prod-tester-amd-gpu-v1' || 'prod-tester-arm-v1' }}
...@@ -372,6 +405,7 @@ jobs: ...@@ -372,6 +405,7 @@ jobs:
multi-gpu-test: multi-gpu-test:
# Multi-GPU support limited to AMD64 only # Multi-GPU support limited to AMD64 only
if: | if: |
!inputs.build_only &&
inputs.run_multi_gpu_tests && inputs.run_multi_gpu_tests &&
inputs.build_image && inputs.build_image &&
( inputs.platform != 'arm64' ) ( inputs.platform != 'arm64' )
...@@ -435,6 +469,7 @@ jobs: ...@@ -435,6 +469,7 @@ jobs:
# Run if copy_to_acr is true AND build succeeded AND (test succeeded OR test was skipped) # Run if copy_to_acr is true AND build succeeded AND (test succeeded OR test was skipped)
if: | if: |
always() && always() &&
!inputs.build_only &&
inputs.copy_to_acr && inputs.copy_to_acr &&
needs.build.result == 'success' && needs.build.result == 'success' &&
(needs.test.result == 'success' || needs.test.result == 'skipped') (needs.test.result == 'success' || needs.test.result == 'skipped')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment