Unverified Commit 07c7cc87 authored by Dillon Cullinan's avatar Dillon Cullinan Committed by GitHub
Browse files

ci: OPS-4223: Add snapshot build to CI (#8159)


Signed-off-by: default avatarDillon Cullinan <dcullinan@nvidia.com>
parent d187a14c
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
name: 'Build Deploy Component'
description: 'Lint, test, and build/push a deploy component container (operator or snapshot)'
inputs:
component:
description: 'Component to build: operator or snapshot'
required: true
image_tag:
description: 'Image tag to apply to built images'
required: true
builder_name:
description: 'Buildx builder name'
required: true
aws_default_region:
description: 'AWS Default Region'
required: true
aws_account_id:
description: 'AWS Account ID'
required: true
azure_acr_hostname:
description: 'Optional Azure ACR hostname for docker login'
required: false
default: ''
azure_acr_user:
description: 'Optional Azure ACR username for docker login'
required: false
default: ''
azure_acr_password:
description: 'Optional Azure ACR password for docker login'
required: false
default: ''
ngc_ci_access_token:
description: 'Optional NGC CI access token for nvcr.io login'
required: false
default: ''
extra_tags:
description: 'Optional newline-separated list of fully-qualified extra image URIs to also tag and push'
required: false
default: ''
outputs:
image_tag:
description: 'The image tag applied to built images'
value: ${{ inputs.image_tag }}
runs:
using: composite
steps:
- name: Determine build settings
id: settings
shell: bash
run: |
if [[ "${{ inputs.component }}" == "operator" ]]; then
echo "lint_platform=linux/arm64" >> $GITHUB_OUTPUT
echo "build_platforms=linux/amd64,linux/arm64" >> $GITHUB_OUTPUT
else
echo "lint_platform=linux/amd64" >> $GITHUB_OUTPUT
echo "build_platforms=linux/amd64" >> $GITHUB_OUTPUT
fi
- name: Initialize Dynamo Builder
uses: ./.github/actions/init-dynamo-builder
with:
builder_name: ${{ inputs.builder_name }}
flavor: general
arch: ${{ steps.settings.outputs.build_platforms }}
- name: Docker Login
uses: ./.github/actions/docker-login
with:
aws_default_region: ${{ inputs.aws_default_region }}
aws_account_id: ${{ inputs.aws_account_id }}
azure_acr_hostname: ${{ inputs.azure_acr_hostname }}
azure_acr_user: ${{ inputs.azure_acr_user }}
azure_acr_password: ${{ inputs.azure_acr_password }}
ngc_ci_access_token: ${{ inputs.ngc_ci_access_token }}
- name: Linter
shell: bash
working-directory: ./deploy/${{ inputs.component }}
env:
ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
run: |
docker buildx build --platform ${{ steps.settings.outputs.lint_platform }} --target linter --progress=plain --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ --build-context snapshot=../snapshot .
- name: Tester
shell: bash
working-directory: ./deploy/${{ inputs.component }}
env:
ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
run: |
docker buildx build --platform ${{ steps.settings.outputs.lint_platform }} --target tester --progress=plain --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ --build-context snapshot=../snapshot .
- name: Set up Go
if: ${{ inputs.component == 'operator' }}
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
with:
go-version: '1.25'
- name: Set up Python
if: ${{ inputs.component == 'operator' }}
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install Python dependencies for operator codegen
if: ${{ inputs.component == 'operator' }}
shell: bash
working-directory: ./deploy/operator
run: |
python -m pip install --upgrade pip
python -m pip install "pydantic>=2,<3" "black==23.1.0" "pyyaml>=6.0"
- name: Check for uncommitted changes
if: ${{ inputs.component == 'operator' }}
shell: bash
working-directory: ./deploy/operator
run: |
make check
- name: Build and push Container
shell: bash
working-directory: ./deploy/${{ inputs.component }}
env:
ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
IMAGE_REGISTRY: ai-dynamo
IMAGE_REPOSITORY: dynamo
run: |
ECR_DEFAULT_IMAGE_BASE="${ECR_HOSTNAME}/${IMAGE_REGISTRY}/${IMAGE_REPOSITORY}"
IMAGE_URIS=("${ECR_DEFAULT_IMAGE_BASE}:${{ inputs.image_tag }}")
# Append optional full extra image URIs provided by caller.
while IFS= read -r extra_image; do
if [[ -n "$extra_image" ]]; then
IMAGE_URIS+=("$extra_image")
fi
done <<< "${{ inputs.extra_tags }}"
TAGGING_FLAGS=$(printf -- '-t %s ' "${IMAGE_URIS[@]}")
echo "flags for docker buildx: ${TAGGING_FLAGS}"
TARGET_FLAG=""
if [[ "${{ inputs.component }}" == "snapshot" ]]; then
TARGET_FLAG="--target agent"
fi
docker buildx build --push \
--platform ${{ steps.settings.outputs.build_platforms }} \
--build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ \
--build-context snapshot=../snapshot \
${TARGET_FLAG} \
${TAGGING_FLAGS} -f Dockerfile .
DISPLAY_NAME="${{ inputs.component }}"
echo "### 🐳 ${DISPLAY_NAME^} Container Images" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Image URI |" >> $GITHUB_STEP_SUMMARY
echo "|-----|" >> $GITHUB_STEP_SUMMARY
for image_uri in "${IMAGE_URIS[@]}"; do
echo "| \`${image_uri}\` |" >> $GITHUB_STEP_SUMMARY
done
...@@ -207,100 +207,49 @@ jobs: ...@@ -207,100 +207,49 @@ jobs:
operator: operator:
name: Operator name: Operator
runs-on: prod-default-v2 runs-on: prod-default-v2
env:
IMAGE_REGISTRY: ai-dynamo
IMAGE_REPOSITORY: dynamo
ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
outputs: outputs:
operator_default_tag: ${{ steps.build-and-push-image.outputs.operator_default_tag }} operator_default_tag: ${{ steps.build.outputs.image_tag }}
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Initialize Dynamo Builder - name: Build and push operator
uses: ./.github/actions/init-dynamo-builder id: build
with: uses: ./.github/actions/build-deploy-component
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
flavor: general
arch: 'linux/amd64,linux/arm64'
- name: Docker Login
uses: ./.github/actions/docker-login
with: with:
component: operator
image_tag: ${{ github.sha }}-operator
builder_name: ${{ needs.changed-files.outputs.builder_name }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }} azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }} azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }} azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
- name: Linter extra_tags: |
shell: bash ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-operator
working-directory: ./deploy/operator
run: |
docker buildx build --platform linux/arm64 --target linter --progress=plain --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ --build-context snapshot=../snapshot .
- name: Tester
shell: bash
working-directory: ./deploy/operator
run: |
docker buildx build --platform linux/arm64 --target tester --progress=plain --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ --build-context snapshot=../snapshot .
- name: Set up Go
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
with:
go-version: '1.25'
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install Python dependencies for operator codegen
shell: bash
working-directory: ./deploy/operator
run: |
python -m pip install --upgrade pip
python -m pip install "pydantic>=2,<3" "black==23.1.0" "pyyaml>=6.0"
- name: Check for uncommitted changes
shell: bash
working-directory: ./deploy/operator
run: |
make check
- name: Build and push Container
id: build-and-push-image
shell: bash
working-directory: ./deploy/operator
env:
NO_CACHE_FLAG: '' # placeholder for future logic to add no cache flag if needed
run: |
ECR_DEFAULT_IMAGE_BASE="${ECR_HOSTNAME}/${IMAGE_REGISTRY}/${IMAGE_REPOSITORY}"
DEFAULT_TAG="${{ github.sha }}-operator"
ACR_IMAGE_BASE="${{ secrets.AZURE_ACR_HOSTNAME }}/${IMAGE_REGISTRY}/${IMAGE_REPOSITORY}"
IMAGE_URIS=(
"${ECR_DEFAULT_IMAGE_BASE}:${DEFAULT_TAG}"
"${ACR_IMAGE_BASE}:${DEFAULT_TAG}"
)
if [[ "${{ github.ref_name }}" == "main" ]]; then
IMAGE_URIS+=(
"${ECR_DEFAULT_IMAGE_BASE}:main-operator"
"${ACR_IMAGE_BASE}:main-operator"
)
fi
echo "operator_default_tag=${DEFAULT_TAG}" >> $GITHUB_OUTPUT # ============================================================================
TAGGING_FLAGS=$(printf -- '-t %s ' "${IMAGE_URIS[@]}") # Snapshot Agent
echo "flags for docker buildx: ${TAGGING_FLAGS}" # ============================================================================
if [[ "$NO_CACHE_FLAG" == "true" ]]; then snapshot-agent:
NO_CACHE_FLAG="--no-cache" name: Snapshot Agent
fi runs-on: prod-default-v2
docker buildx build --push ${NO_CACHE_FLAG} \ steps:
--platform linux/amd64,linux/arm64 \ - name: Checkout code
--build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ \ uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
--build-context snapshot=../snapshot \ - name: Build and push snapshot agent
${TAGGING_FLAGS} -f Dockerfile . uses: ./.github/actions/build-deploy-component
with:
echo "### 🐳 Operator Container Images" >> $GITHUB_STEP_SUMMARY component: snapshot
echo "" >> $GITHUB_STEP_SUMMARY image_tag: ${{ github.sha }}-snapshot-agent
echo "| Image URI |" >> $GITHUB_STEP_SUMMARY builder_name: ${{ needs.changed-files.outputs.builder_name }}
echo "|-----|" >> $GITHUB_STEP_SUMMARY aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
for image_uri in "${IMAGE_URIS[@]}"; do aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
echo "| \`${image_uri}\` |" >> $GITHUB_STEP_SUMMARY azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
done azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
extra_tags: |
${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-snapshot-agent
# ============================================================================ # ============================================================================
# DEPLOYMENT JOBS # DEPLOYMENT JOBS
......
...@@ -51,6 +51,7 @@ jobs: ...@@ -51,6 +51,7 @@ jobs:
needs: needs:
- changed-files - changed-files
- operator - operator
- snapshot-agent
- vllm-build - vllm-build
- vllm-dev-build - vllm-dev-build
- vllm-test - vllm-test
...@@ -83,99 +84,60 @@ jobs: ...@@ -83,99 +84,60 @@ jobs:
run: | run: |
echo '${{ toJson(needs) }}' | jq -e 'to_entries | map(.value.result) | all(. as $result | ["success", "skipped", "cancelled"] | any($result == .))' echo '${{ toJson(needs) }}' | jq -e 'to_entries | map(.value.result) | all(. as $result | ["success", "skipped", "cancelled"] | any($result == .))'
# ============================================================================
# Operator
# ============================================================================
operator: operator:
needs: changed-files needs: changed-files
if: needs.changed-files.outputs.operator == 'true' if: needs.changed-files.outputs.operator == 'true'
name: Operator name: Operator
runs-on: prod-default-v2 runs-on: prod-default-v2
env:
IMAGE_REGISTRY: ai-dynamo
IMAGE_REPOSITORY: dynamo
ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
outputs: outputs:
operator_default_tag: ${{ steps.build-and-push-image.outputs.operator_default_tag }} operator_default_tag: ${{ steps.build.outputs.image_tag }}
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Initialize Dynamo Builder - name: Build and push operator
uses: ./.github/actions/init-dynamo-builder id: build
uses: ./.github/actions/build-deploy-component
with: with:
component: operator
image_tag: ${{ github.sha }}-operator
builder_name: ${{ needs.changed-files.outputs.builder_name }} builder_name: ${{ needs.changed-files.outputs.builder_name }}
flavor: general
arch: 'linux/amd64,linux/arm64'
- name: Docker Login
uses: ./.github/actions/docker-login
with:
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }} azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }} azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }} azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
- name: Linter extra_tags: |
shell: bash ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-operator
working-directory: ./deploy/operator
run: |
docker buildx build --platform linux/arm64 --target linter --progress=plain --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ --build-context snapshot=../snapshot . # ============================================================================
- name: Tester # Snapshot Agent
shell: bash # ============================================================================
working-directory: ./deploy/operator
run: | snapshot-agent:
docker buildx build --platform linux/arm64 --target tester --progress=plain --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ --build-context snapshot=../snapshot . needs: changed-files
- name: Set up Go if: needs.changed-files.outputs.deploy == 'true'
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0 name: Snapshot Agent
with: runs-on: prod-default-v2
go-version: '1.25' steps:
- name: Set up Python - name: Checkout code
uses: actions/setup-python@v5 uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Build and push snapshot agent
uses: ./.github/actions/build-deploy-component
with: with:
python-version: "3.11" component: snapshot
- name: Install Python dependencies for operator codegen image_tag: ${{ github.sha }}-snapshot-agent
shell: bash builder_name: ${{ needs.changed-files.outputs.builder_name }}
working-directory: ./deploy/operator aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
run: | aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
python -m pip install --upgrade pip
python -m pip install "pydantic>=2,<3" "black==23.1.0" "pyyaml>=6.0"
- name: Check for uncommitted changes
shell: bash
working-directory: ./deploy/operator
run: |
make check
- name: Build and push Container
id: build-and-push-image
shell: bash
working-directory: ./deploy/operator
env:
NO_CACHE_FLAG: '' # placeholder for future logic to add no cache flag if needed
run: |
ECR_DEFAULT_IMAGE_BASE="${ECR_HOSTNAME}/${IMAGE_REGISTRY}/${IMAGE_REPOSITORY}"
DEFAULT_TAG="${{ github.sha }}-operator"
ACR_IMAGE_BASE="${{ secrets.AZURE_ACR_HOSTNAME }}/${IMAGE_REGISTRY}/${IMAGE_REPOSITORY}"
IMAGE_URIS=(
"${ECR_DEFAULT_IMAGE_BASE}:${DEFAULT_TAG}"
"${ACR_IMAGE_BASE}:${DEFAULT_TAG}"
)
echo "operator_default_tag=${DEFAULT_TAG}" >> $GITHUB_OUTPUT
TAGGING_FLAGS=$(printf -- '-t %s ' "${IMAGE_URIS[@]}")
echo "flags for docker buildx: ${TAGGING_FLAGS}"
if [[ "$NO_CACHE_FLAG" == "true" ]]; then
NO_CACHE_FLAG="--no-cache"
fi
docker buildx build --push ${NO_CACHE_FLAG} \
--platform linux/amd64,linux/arm64 \
--build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ \
--build-context snapshot=../snapshot \
${TAGGING_FLAGS} -f Dockerfile .
echo "### 🐳 Operator Container Images" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Image URI |" >> $GITHUB_STEP_SUMMARY
echo "|-----|" >> $GITHUB_STEP_SUMMARY
for image_uri in "${IMAGE_URIS[@]}"; do
echo "| \`${image_uri}\` |" >> $GITHUB_STEP_SUMMARY
done
# ============================================================================
# FRAMEWORK PIPELINES (Build → Test → Copy)
# ============================================================================
# ============================================================================ # ============================================================================
# BUILD PIPELINES # BUILD PIPELINES
# ============================================================================ # ============================================================================
......
...@@ -291,6 +291,13 @@ jobs: ...@@ -291,6 +291,13 @@ jobs:
OPERATOR_TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/kubernetes-operator:${NGC_VERSION_TAG}" OPERATOR_TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/kubernetes-operator:${NGC_VERSION_TAG}"
copy_image "${OPERATOR_SOURCE}" "${OPERATOR_TARGET}" "kubernetes-operator:${NGC_VERSION_TAG}" copy_image "${OPERATOR_SOURCE}" "${OPERATOR_TARGET}" "kubernetes-operator:${NGC_VERSION_TAG}"
# ---- Snapshot image ----
echo ""
echo "=== Snapshot Image ==="
SNAPSHOT_SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-snapshot-agent"
SNAPSHOT_TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/snapshot-agent:${NGC_VERSION_TAG}"
copy_image "${SNAPSHOT_SOURCE}" "${SNAPSHOT_TARGET}" "snapshot-agent:${NGC_VERSION_TAG}"
# ---- Summary ---- # ---- Summary ----
echo "successful_count=${#SUCCESSFUL_COPIES[@]}" >> $GITHUB_OUTPUT echo "successful_count=${#SUCCESSFUL_COPIES[@]}" >> $GITHUB_OUTPUT
echo "failed_count=${#FAILED_COPIES[@]}" >> $GITHUB_OUTPUT echo "failed_count=${#FAILED_COPIES[@]}" >> $GITHUB_OUTPUT
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment