Unverified Commit 07c7cc87 authored by Dillon Cullinan's avatar Dillon Cullinan Committed by GitHub
Browse files

ci: OPS-4223: Add snapshot build to CI (#8159)


Signed-off-by: default avatarDillon Cullinan <dcullinan@nvidia.com>
parent d187a14c
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
name: 'Build Deploy Component'
description: 'Lint, test, and build/push a deploy component container (operator or snapshot)'
inputs:
component:
description: 'Component to build: operator or snapshot'
required: true
image_tag:
description: 'Image tag to apply to built images'
required: true
builder_name:
description: 'Buildx builder name'
required: true
aws_default_region:
description: 'AWS Default Region'
required: true
aws_account_id:
description: 'AWS Account ID'
required: true
azure_acr_hostname:
description: 'Optional Azure ACR hostname for docker login'
required: false
default: ''
azure_acr_user:
description: 'Optional Azure ACR username for docker login'
required: false
default: ''
azure_acr_password:
description: 'Optional Azure ACR password for docker login'
required: false
default: ''
ngc_ci_access_token:
description: 'Optional NGC CI access token for nvcr.io login'
required: false
default: ''
extra_tags:
description: 'Optional newline-separated list of fully-qualified extra image URIs to also tag and push'
required: false
default: ''
outputs:
image_tag:
description: 'The image tag applied to built images'
value: ${{ inputs.image_tag }}
runs:
using: composite
steps:
- name: Determine build settings
id: settings
shell: bash
run: |
if [[ "${{ inputs.component }}" == "operator" ]]; then
echo "lint_platform=linux/arm64" >> $GITHUB_OUTPUT
echo "build_platforms=linux/amd64,linux/arm64" >> $GITHUB_OUTPUT
else
echo "lint_platform=linux/amd64" >> $GITHUB_OUTPUT
echo "build_platforms=linux/amd64" >> $GITHUB_OUTPUT
fi
- name: Initialize Dynamo Builder
uses: ./.github/actions/init-dynamo-builder
with:
builder_name: ${{ inputs.builder_name }}
flavor: general
arch: ${{ steps.settings.outputs.build_platforms }}
- name: Docker Login
uses: ./.github/actions/docker-login
with:
aws_default_region: ${{ inputs.aws_default_region }}
aws_account_id: ${{ inputs.aws_account_id }}
azure_acr_hostname: ${{ inputs.azure_acr_hostname }}
azure_acr_user: ${{ inputs.azure_acr_user }}
azure_acr_password: ${{ inputs.azure_acr_password }}
ngc_ci_access_token: ${{ inputs.ngc_ci_access_token }}
- name: Linter
shell: bash
working-directory: ./deploy/${{ inputs.component }}
env:
ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
run: |
docker buildx build --platform ${{ steps.settings.outputs.lint_platform }} --target linter --progress=plain --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ --build-context snapshot=../snapshot .
- name: Tester
shell: bash
working-directory: ./deploy/${{ inputs.component }}
env:
ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
run: |
docker buildx build --platform ${{ steps.settings.outputs.lint_platform }} --target tester --progress=plain --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ --build-context snapshot=../snapshot .
- name: Set up Go
if: ${{ inputs.component == 'operator' }}
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
with:
go-version: '1.25'
- name: Set up Python
if: ${{ inputs.component == 'operator' }}
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install Python dependencies for operator codegen
if: ${{ inputs.component == 'operator' }}
shell: bash
working-directory: ./deploy/operator
run: |
python -m pip install --upgrade pip
python -m pip install "pydantic>=2,<3" "black==23.1.0" "pyyaml>=6.0"
- name: Check for uncommitted changes
if: ${{ inputs.component == 'operator' }}
shell: bash
working-directory: ./deploy/operator
run: |
make check
- name: Build and push Container
shell: bash
working-directory: ./deploy/${{ inputs.component }}
env:
ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
IMAGE_REGISTRY: ai-dynamo
IMAGE_REPOSITORY: dynamo
run: |
ECR_DEFAULT_IMAGE_BASE="${ECR_HOSTNAME}/${IMAGE_REGISTRY}/${IMAGE_REPOSITORY}"
IMAGE_URIS=("${ECR_DEFAULT_IMAGE_BASE}:${{ inputs.image_tag }}")
# Append optional full extra image URIs provided by caller.
while IFS= read -r extra_image; do
if [[ -n "$extra_image" ]]; then
IMAGE_URIS+=("$extra_image")
fi
done <<< "${{ inputs.extra_tags }}"
TAGGING_FLAGS=$(printf -- '-t %s ' "${IMAGE_URIS[@]}")
echo "flags for docker buildx: ${TAGGING_FLAGS}"
TARGET_FLAG=""
if [[ "${{ inputs.component }}" == "snapshot" ]]; then
TARGET_FLAG="--target agent"
fi
docker buildx build --push \
--platform ${{ steps.settings.outputs.build_platforms }} \
--build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ \
--build-context snapshot=../snapshot \
${TARGET_FLAG} \
${TAGGING_FLAGS} -f Dockerfile .
DISPLAY_NAME="${{ inputs.component }}"
echo "### 🐳 ${DISPLAY_NAME^} Container Images" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Image URI |" >> $GITHUB_STEP_SUMMARY
echo "|-----|" >> $GITHUB_STEP_SUMMARY
for image_uri in "${IMAGE_URIS[@]}"; do
echo "| \`${image_uri}\` |" >> $GITHUB_STEP_SUMMARY
done
......@@ -207,100 +207,49 @@ jobs:
operator:
name: Operator
runs-on: prod-default-v2
env:
IMAGE_REGISTRY: ai-dynamo
IMAGE_REPOSITORY: dynamo
ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
outputs:
operator_default_tag: ${{ steps.build-and-push-image.outputs.operator_default_tag }}
operator_default_tag: ${{ steps.build.outputs.image_tag }}
steps:
- name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Initialize Dynamo Builder
uses: ./.github/actions/init-dynamo-builder
with:
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
flavor: general
arch: 'linux/amd64,linux/arm64'
- name: Docker Login
uses: ./.github/actions/docker-login
- name: Build and push operator
id: build
uses: ./.github/actions/build-deploy-component
with:
component: operator
image_tag: ${{ github.sha }}-operator
builder_name: ${{ needs.changed-files.outputs.builder_name }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
- name: Linter
shell: bash
working-directory: ./deploy/operator
run: |
docker buildx build --platform linux/arm64 --target linter --progress=plain --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ --build-context snapshot=../snapshot .
- name: Tester
shell: bash
working-directory: ./deploy/operator
run: |
docker buildx build --platform linux/arm64 --target tester --progress=plain --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ --build-context snapshot=../snapshot .
- name: Set up Go
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
with:
go-version: '1.25'
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install Python dependencies for operator codegen
shell: bash
working-directory: ./deploy/operator
run: |
python -m pip install --upgrade pip
python -m pip install "pydantic>=2,<3" "black==23.1.0" "pyyaml>=6.0"
- name: Check for uncommitted changes
shell: bash
working-directory: ./deploy/operator
run: |
make check
- name: Build and push Container
id: build-and-push-image
shell: bash
working-directory: ./deploy/operator
env:
NO_CACHE_FLAG: '' # placeholder for future logic to add no cache flag if needed
run: |
ECR_DEFAULT_IMAGE_BASE="${ECR_HOSTNAME}/${IMAGE_REGISTRY}/${IMAGE_REPOSITORY}"
DEFAULT_TAG="${{ github.sha }}-operator"
ACR_IMAGE_BASE="${{ secrets.AZURE_ACR_HOSTNAME }}/${IMAGE_REGISTRY}/${IMAGE_REPOSITORY}"
IMAGE_URIS=(
"${ECR_DEFAULT_IMAGE_BASE}:${DEFAULT_TAG}"
"${ACR_IMAGE_BASE}:${DEFAULT_TAG}"
)
if [[ "${{ github.ref_name }}" == "main" ]]; then
IMAGE_URIS+=(
"${ECR_DEFAULT_IMAGE_BASE}:main-operator"
"${ACR_IMAGE_BASE}:main-operator"
)
fi
extra_tags: |
${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-operator
echo "operator_default_tag=${DEFAULT_TAG}" >> $GITHUB_OUTPUT
TAGGING_FLAGS=$(printf -- '-t %s ' "${IMAGE_URIS[@]}")
echo "flags for docker buildx: ${TAGGING_FLAGS}"
# ============================================================================
# Snapshot Agent
# ============================================================================
if [[ "$NO_CACHE_FLAG" == "true" ]]; then
NO_CACHE_FLAG="--no-cache"
fi
docker buildx build --push ${NO_CACHE_FLAG} \
--platform linux/amd64,linux/arm64 \
--build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ \
--build-context snapshot=../snapshot \
${TAGGING_FLAGS} -f Dockerfile .
echo "### 🐳 Operator Container Images" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Image URI |" >> $GITHUB_STEP_SUMMARY
echo "|-----|" >> $GITHUB_STEP_SUMMARY
for image_uri in "${IMAGE_URIS[@]}"; do
echo "| \`${image_uri}\` |" >> $GITHUB_STEP_SUMMARY
done
snapshot-agent:
name: Snapshot Agent
runs-on: prod-default-v2
steps:
- name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Build and push snapshot agent
uses: ./.github/actions/build-deploy-component
with:
component: snapshot
image_tag: ${{ github.sha }}-snapshot-agent
builder_name: ${{ needs.changed-files.outputs.builder_name }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
extra_tags: |
${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-snapshot-agent
# ============================================================================
# DEPLOYMENT JOBS
......
......@@ -51,6 +51,7 @@ jobs:
needs:
- changed-files
- operator
- snapshot-agent
- vllm-build
- vllm-dev-build
- vllm-test
......@@ -83,99 +84,60 @@ jobs:
run: |
echo '${{ toJson(needs) }}' | jq -e 'to_entries | map(.value.result) | all(. as $result | ["success", "skipped", "cancelled"] | any($result == .))'
# ============================================================================
# Operator
# ============================================================================
operator:
needs: changed-files
if: needs.changed-files.outputs.operator == 'true'
name: Operator
runs-on: prod-default-v2
env:
IMAGE_REGISTRY: ai-dynamo
IMAGE_REPOSITORY: dynamo
ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
outputs:
operator_default_tag: ${{ steps.build-and-push-image.outputs.operator_default_tag }}
operator_default_tag: ${{ steps.build.outputs.image_tag }}
steps:
- name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Initialize Dynamo Builder
uses: ./.github/actions/init-dynamo-builder
- name: Build and push operator
id: build
uses: ./.github/actions/build-deploy-component
with:
component: operator
image_tag: ${{ github.sha }}-operator
builder_name: ${{ needs.changed-files.outputs.builder_name }}
flavor: general
arch: 'linux/amd64,linux/arm64'
- name: Docker Login
uses: ./.github/actions/docker-login
with:
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
- name: Linter
shell: bash
working-directory: ./deploy/operator
run: |
docker buildx build --platform linux/arm64 --target linter --progress=plain --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ --build-context snapshot=../snapshot .
- name: Tester
shell: bash
working-directory: ./deploy/operator
run: |
docker buildx build --platform linux/arm64 --target tester --progress=plain --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ --build-context snapshot=../snapshot .
- name: Set up Go
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
with:
go-version: '1.25'
- name: Set up Python
uses: actions/setup-python@v5
extra_tags: |
${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-operator
# ============================================================================
# Snapshot Agent
# ============================================================================
snapshot-agent:
needs: changed-files
if: needs.changed-files.outputs.deploy == 'true'
name: Snapshot Agent
runs-on: prod-default-v2
steps:
- name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Build and push snapshot agent
uses: ./.github/actions/build-deploy-component
with:
python-version: "3.11"
- name: Install Python dependencies for operator codegen
shell: bash
working-directory: ./deploy/operator
run: |
python -m pip install --upgrade pip
python -m pip install "pydantic>=2,<3" "black==23.1.0" "pyyaml>=6.0"
- name: Check for uncommitted changes
shell: bash
working-directory: ./deploy/operator
run: |
make check
- name: Build and push Container
id: build-and-push-image
shell: bash
working-directory: ./deploy/operator
env:
NO_CACHE_FLAG: '' # placeholder for future logic to add no cache flag if needed
run: |
ECR_DEFAULT_IMAGE_BASE="${ECR_HOSTNAME}/${IMAGE_REGISTRY}/${IMAGE_REPOSITORY}"
DEFAULT_TAG="${{ github.sha }}-operator"
ACR_IMAGE_BASE="${{ secrets.AZURE_ACR_HOSTNAME }}/${IMAGE_REGISTRY}/${IMAGE_REPOSITORY}"
IMAGE_URIS=(
"${ECR_DEFAULT_IMAGE_BASE}:${DEFAULT_TAG}"
"${ACR_IMAGE_BASE}:${DEFAULT_TAG}"
)
echo "operator_default_tag=${DEFAULT_TAG}" >> $GITHUB_OUTPUT
TAGGING_FLAGS=$(printf -- '-t %s ' "${IMAGE_URIS[@]}")
echo "flags for docker buildx: ${TAGGING_FLAGS}"
if [[ "$NO_CACHE_FLAG" == "true" ]]; then
NO_CACHE_FLAG="--no-cache"
fi
docker buildx build --push ${NO_CACHE_FLAG} \
--platform linux/amd64,linux/arm64 \
--build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ \
--build-context snapshot=../snapshot \
${TAGGING_FLAGS} -f Dockerfile .
echo "### 🐳 Operator Container Images" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Image URI |" >> $GITHUB_STEP_SUMMARY
echo "|-----|" >> $GITHUB_STEP_SUMMARY
for image_uri in "${IMAGE_URIS[@]}"; do
echo "| \`${image_uri}\` |" >> $GITHUB_STEP_SUMMARY
done
component: snapshot
image_tag: ${{ github.sha }}-snapshot-agent
builder_name: ${{ needs.changed-files.outputs.builder_name }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
# ============================================================================
# FRAMEWORK PIPELINES (Build → Test → Copy)
# ============================================================================
# ============================================================================
# BUILD PIPELINES
# ============================================================================
......
......@@ -291,6 +291,13 @@ jobs:
OPERATOR_TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/kubernetes-operator:${NGC_VERSION_TAG}"
copy_image "${OPERATOR_SOURCE}" "${OPERATOR_TARGET}" "kubernetes-operator:${NGC_VERSION_TAG}"
# ---- Snapshot image ----
echo ""
echo "=== Snapshot Image ==="
SNAPSHOT_SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-snapshot-agent"
SNAPSHOT_TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/snapshot-agent:${NGC_VERSION_TAG}"
copy_image "${SNAPSHOT_SOURCE}" "${SNAPSHOT_TARGET}" "snapshot-agent:${NGC_VERSION_TAG}"
# ---- Summary ----
echo "successful_count=${#SUCCESSFUL_COPIES[@]}" >> $GITHUB_OUTPUT
echo "failed_count=${#FAILED_COPIES[@]}" >> $GITHUB_OUTPUT
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment