Unverified Commit 20eee509 authored by Ran Rubin's avatar Ran Rubin Committed by GitHub
Browse files

ci: convert framework image pipelines to single multi-arch manifest builds (#7399)


Co-authored-by: default avatarClaude Sonnet 4.6 <noreply@anthropic.com>
parent b2aefc53
......@@ -8,7 +8,7 @@ inputs:
description: 'Target stage for Docker rendering'
required: true
platform:
description: 'Platform to build (amd64 or arm64)'
description: 'Docker platform string (e.g. linux/amd64, linux/amd64,linux/arm64)'
required: true
cuda_version:
description: 'CUDA version to build (e.g., 12.9, 13.0)'
......@@ -68,15 +68,7 @@ inputs:
required: false
default: 'false'
extra_tags:
description: 'Additional tags (newline-separated, -$platform suffix auto-appended)'
required: false
default: ''
build_only:
description: 'Build and push only controls extra tag logic'
required: false
default: 'false'
sanitized_ref_name:
description: 'Sanitized git ref name for branch-tagged images'
description: 'Additional tags (newline-separated)'
required: false
default: ''
show_summary:
......@@ -121,8 +113,8 @@ runs:
fi
TARGET_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}-${{ inputs.target }}${EFA_SUFFIX}"
TEST_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}-${{ inputs.target }}${EFA_SUFFIX}-test"
DEFAULT_TARGET_IMAGE_URI="${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG_PLAIN}-cuda${CUDA_VERSION}-${{ inputs.platform }}"
TEST_IMAGE_URI="${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com/ai-dynamo/dynamo:${TEST_TAG_PLAIN}-cuda${CUDA_VERSION}-${{ inputs.platform }}"
DEFAULT_TARGET_IMAGE_URI="${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG_PLAIN}-cuda${CUDA_VERSION}"
TEST_IMAGE_URI="${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com/ai-dynamo/dynamo:${TEST_TAG_PLAIN}-cuda${CUDA_VERSION}"
echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT
echo "test_image_uri=${TEST_IMAGE_URI}" >> $GITHUB_OUTPUT
echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT
......@@ -135,7 +127,7 @@ runs:
flavor: ${{ inputs.framework }}
arch: ${{ inputs.platform }}
cuda_version: ${{ inputs.cuda_version }}
- name: Calculate extra tags with platform suffix
- name: Calculate extra tags
id: extra-tags
shell: bash
env:
......@@ -149,20 +141,10 @@ runs:
if [ -n "$EXTRA_TAGS" ]; then
while IFS= read -r tag; do
if [ -n "$tag" ]; then
RESULT+="${ECR_REGISTRY}/ai-dynamo/dynamo:${tag}-cuda${CUDA_VERSION_MAJOR}-${{ inputs.platform }}"$'\n'
RESULT+="${ECR_REGISTRY}/ai-dynamo/dynamo:${tag}-cuda${CUDA_VERSION_MAJOR}"$'\n'
fi
done <<< "$EXTRA_TAGS"
fi
if [ "${{ inputs.build_only }}" == "true" ]; then
if [ -z "${{ inputs.sanitized_ref_name }}" ]; then
echo "::warning::sanitized_ref_name is empty but build_only is true; skipping branch tags"
else
BRANCH_TAG="${{ inputs.sanitized_ref_name }}-${{ inputs.framework }}"
RESULT+="${ECR_REGISTRY}/ai-dynamo/dynamo:${BRANCH_TAG}-cuda${CUDA_VERSION_MAJOR}-${{ inputs.platform }}"$'\n'
RESULT+="${ACR_REGISTRY}/ai-dynamo/dynamo:${BRANCH_TAG}-cuda${CUDA_VERSION_MAJOR}-${{ inputs.platform }}"$'\n'
RESULT+="${ACR_REGISTRY}/ai-dynamo/dynamo:${{ steps.calculate-target-tag.outputs.target_tag_plain }}-cuda${CUDA_VERSION_MAJOR}-${{ inputs.platform }}"$'\n'
fi
fi
if [ -n "$RESULT" ]; then
echo "tags<<EOF" >> $GITHUB_OUTPUT
echo "$RESULT" >> $GITHUB_OUTPUT
......@@ -236,7 +218,7 @@ runs:
ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
run: |
CUDA_MAJOR=${{ steps.calculate-target-tag.outputs.cuda_version_plain }}
CACHE_TAG="test-${{ inputs.framework }}-cuda${CUDA_MAJOR}-${{ inputs.platform }}-cache"
CACHE_TAG="test-${{ inputs.framework }}-cuda${CUDA_MAJOR}-cache"
CACHE_ARGS="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${CACHE_TAG}"
CACHE_ARGS+=" --cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${CACHE_TAG}"
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
......@@ -261,7 +243,7 @@ runs:
--progress=plain \
${PUSH_ARGS} \
${NO_CACHE_ARG} \
--platform linux/${{ inputs.platform }} \
--platform ${{ inputs.platform }} \
-f container/Dockerfile.test \
--build-arg BASE_IMAGE=${{ steps.calculate-target-tag.outputs.default_target_image_uri }} \
${CACHE_ARGS} \
......@@ -270,7 +252,7 @@ runs:
shell: bash
if: ${{ inputs.push_image == 'true' && inputs.show_summary == 'true' }}
run: |
echo "### 🐳 ${{ inputs.framework }}-cuda${{ inputs.cuda_version }}-${{ inputs.platform }} Default Image" >> $GITHUB_STEP_SUMMARY
echo "### 🐳 ${{ inputs.framework }}-cuda${{ inputs.cuda_version }} Default Image" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Image URI |" >> $GITHUB_STEP_SUMMARY
echo "|-----|" >> $GITHUB_STEP_SUMMARY
......
......@@ -22,17 +22,13 @@ inputs:
description: 'Buildkit flavor used to route workers on re-init (vllm, sglang, trtllm, general)'
required: true
arch:
description: 'Target architecture used on re-init (amd64, arm64)'
description: 'Docker platform string used on re-init (e.g. linux/amd64, linux/arm64, linux/amd64,linux/arm64)'
required: false
default: 'amd64'
default: 'linux/amd64'
cuda_version:
description: 'CUDA version used on re-init (e.g. 12.9, 13.0). Leave empty for general flavor.'
required: false
default: ''
all_arch:
description: 'If true, re-initialize builder for both amd64 and arm64 architectures'
required: false
default: 'false'
# Kubernetes fallback passthrough inputs (forwarded to init-dynamo-builder)
ephemeral_storage:
......@@ -90,7 +86,6 @@ runs:
builder_name: ${{ inputs.builder_name }}
flavor: ${{ inputs.flavor }}
arch: ${{ inputs.arch }}
all_arch: ${{ inputs.all_arch }}
cuda_version: ${{ inputs.cuda_version }}
ephemeral_storage: ${{ inputs.ephemeral_storage }}
namespace: ${{ inputs.namespace }}
......
......@@ -12,9 +12,9 @@ inputs:
description: 'Name for the uploaded artifact (e.g., compliance-vllm-cuda12-amd64)'
required: true
arch:
description: 'Target architecture (amd64, arm64)'
description: 'Docker platform string (e.g. linux/amd64, linux/arm64)'
required: false
default: 'amd64'
default: 'linux/amd64'
framework:
description: 'Framework name for base image resolution (vllm, sglang, trtllm, dynamo)'
required: false
......@@ -78,7 +78,7 @@ runs:
mkdir -p /tmp/compliance-target
docker buildx build \
--builder ${{ steps.builder.outputs.name }} \
--platform linux/${{ inputs.arch }} \
--platform ${{ inputs.arch }} \
--build-arg TARGET_IMAGE="${{ inputs.image }}" \
--output "type=local,dest=/tmp/compliance-target" \
--pull \
......@@ -102,7 +102,7 @@ runs:
mkdir -p /tmp/compliance-base
docker buildx build \
--builder ${{ steps.builder.outputs.name }} \
--platform linux/${{ inputs.arch }} \
--platform ${{ inputs.arch }} \
--build-arg TARGET_IMAGE="${{ steps.resolve-base.outputs.base_image }}" \
--output "type=local,dest=/tmp/compliance-base" \
--pull \
......
......@@ -11,8 +11,8 @@ description: 'Route buildkit workers and bootstrap buildx builder for dynamo bui
# 4. Falls back to Kubernetes driver if no remote workers are available
#
# Architecture modes:
# - Single arch: Set arch to 'amd64' or 'arm64' to initialize for one architecture
# - Multi arch: Set all_arch to 'true' to initialize for both amd64 and arm64
# - Single arch: Set arch to 'linux/amd64' or 'linux/arm64'
# - Multi arch: Set arch to 'linux/amd64,linux/arm64'
#
# Flavor routing:
# BuildKit pods are assigned to flavors based on pod index modulo 3:
......@@ -26,14 +26,14 @@ description: 'Route buildkit workers and bootstrap buildx builder for dynamo bui
# with:
# builder_name: my-builder
# flavor: general
# all_arch: 'true'
# arch: 'linux/amd64,linux/arm64'
#
# # Initialize for single architecture with specific flavor and CUDA version:
# - uses: ./.github/actions/init-dynamo-builder
# with:
# builder_name: my-builder
# flavor: vllm
# arch: amd64
# arch: 'linux/amd64'
# cuda_version: '12.9'
inputs:
......@@ -45,13 +45,9 @@ inputs:
required: false
default: 'general'
arch:
description: 'Target architecture (amd64, arm64). Ignored if all_arch is true.'
description: 'Docker platform string: linux/amd64, linux/arm64, or linux/amd64,linux/arm64'
required: false
default: 'amd64'
all_arch:
description: 'If true, initialize builder for both amd64 and arm64 architectures'
required: false
default: 'false'
default: 'linux/amd64'
cuda_version:
description: 'CUDA version (12.9, 13.0). Optional for general flavor.'
required: false
......@@ -99,22 +95,29 @@ runs:
if [[ -n "${{ inputs.cuda_version }}" ]]; then
CUDA_ARG="--cuda ${{ inputs.cuda_version }}"
fi
if [[ "${{ inputs.all_arch }}" == "true" ]]; then
echo "running with --arch all --flavor ${{ inputs.flavor }} $CUDA_ARG"
.github/scripts/route_buildkit.sh --arch all --flavor ${{ inputs.flavor }} $CUDA_ARG
# Strip linux/ prefix (e.g. linux/amd64,linux/arm64 → amd64,arm64)
ARCH="${{ inputs.arch }}"
ARCH="${ARCH//linux\//}"
if [[ "$ARCH" == *","* ]]; then
ROUTE_ARCH="all"
else
echo "running with --arch ${{ inputs.arch }} --flavor ${{ inputs.flavor }} $CUDA_ARG"
.github/scripts/route_buildkit.sh --arch ${{ inputs.arch }} --flavor ${{ inputs.flavor }} $CUDA_ARG
ROUTE_ARCH="$ARCH"
fi
echo "running with --arch ${ROUTE_ARCH} --flavor ${{ inputs.flavor }} $CUDA_ARG"
.github/scripts/route_buildkit.sh --arch ${ROUTE_ARCH} --flavor ${{ inputs.flavor }} $CUDA_ARG
- name: Prepare worker addresses and platform
id: prepare
shell: bash
env:
AMD64_ADDRS: ${{ steps.route-buildkit.outputs[format('{0}_amd64', inputs.flavor)] }}
ARM64_ADDRS: ${{ steps.route-buildkit.outputs[format('{0}_arm64', inputs.flavor)] }}
run: |
if [[ "${{ inputs.all_arch }}" == "true" ]]; then
# Combine both architecture outputs for multi-arch builds
AMD64_ADDRS="${{ steps.route-buildkit.outputs[format('{0}_amd64', inputs.flavor)] }}"
ARM64_ADDRS="${{ steps.route-buildkit.outputs[format('{0}_arm64', inputs.flavor)] }}"
# Strip linux/ prefix (e.g. linux/amd64,linux/arm64 → amd64,arm64)
ARCH="${{ inputs.arch }}"
ARCH="${ARCH//linux\//}"
if [[ "$ARCH" == *","* ]]; then
# Multi-arch: combine both worker pools
if [[ -n "$AMD64_ADDRS" && -n "$ARM64_ADDRS" ]]; then
echo "worker_addresses=${AMD64_ADDRS},${ARM64_ADDRS}" >> "$GITHUB_OUTPUT"
elif [[ -n "$AMD64_ADDRS" ]]; then
......@@ -124,9 +127,10 @@ runs:
else
echo "worker_addresses=" >> "$GITHUB_OUTPUT"
fi
elif [[ "$ARCH" == "arm64" ]]; then
echo "worker_addresses=${ARM64_ADDRS}" >> "$GITHUB_OUTPUT"
else
# Single architecture build
echo "worker_addresses=${{ steps.route-buildkit.outputs[format('{0}_{1}', inputs.flavor, inputs.arch)] }}" >> "$GITHUB_OUTPUT"
echo "worker_addresses=${AMD64_ADDRS}" >> "$GITHUB_OUTPUT"
fi
- name: Bootstrap buildkit
......
......@@ -53,6 +53,10 @@ inputs:
target_azure_acr_password:
description: 'Azure ACR password for target registry'
required: false
override_arch:
description: 'Copy only the specified architecture (e.g. amd64). If empty, copies all platforms (manifest list).'
required: false
default: ''
outputs:
target_image_ref:
......@@ -109,7 +113,13 @@ runs:
RETRY_DELAY=10
for attempt in $(seq 1 $MAX_RETRIES); do
echo "Attempt ${attempt}/${MAX_RETRIES}..."
if skopeo copy --all --retry-times 4 "${SOURCE_REF}" "${TARGET_REF}"; then
ARCH_FLAG=""
if [ -n "${{ inputs.override_arch }}" ]; then
ARCH_FLAG="--override-arch ${{ inputs.override_arch }}"
else
ARCH_FLAG="--all"
fi
if skopeo copy ${ARCH_FLAG} --retry-times 4 "${SOURCE_REF}" "${TARGET_REF}"; then
echo "target_image_ref=${{ inputs.target_registry }}/${TARGET_IMAGE}:${TARGET_TAG}" >> $GITHUB_OUTPUT
echo "✅ Image copied successfully"
exit 0
......
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
name: Build Framework Image Matrix
name: Build Framework Image
on:
workflow_call:
......@@ -14,10 +14,11 @@ on:
description: 'Target stage for Docker rendering'
required: true
type: string
platforms:
description: 'Platforms to build (JSON array, e.g., ["amd64", "arm64"])'
required: true
platform:
description: 'Docker platform(s) to build (e.g. linux/amd64,linux/arm64)'
required: false
type: string
default: 'linux/amd64,linux/arm64'
cuda_versions:
description: 'CUDA versions to build (JSON array, e.g., ["12.9", "13.0"])'
required: true
......@@ -27,7 +28,7 @@ on:
required: true
type: string
build_timeout_minutes:
description: 'Timeout in minutes for the build step'
description: 'Timeout in minutes for the build job'
required: false
type: number
default: 60
......@@ -36,13 +37,8 @@ on:
required: false
type: boolean
default: false
extra_tags:
description: 'Additional tags (newline-separated, -$platform suffix auto-appended)'
required: false
type: string
default: ''
make_efa:
description: 'Enable AWS EFA support in the build'
copy_to_acr:
description: 'Also push the image to ACR'
required: false
type: boolean
default: false
......@@ -51,31 +47,26 @@ on:
required: false
type: boolean
default: false
sanitized_ref_name:
description: 'Sanitized git ref name for branch-tagged images'
extra_tags:
description: 'Additional tags (newline-separated)'
required: false
type: string
default: ''
build_only:
description: 'Build and push only enables branch-tagged images'
required: false
type: boolean
default: false
run_compliance_scan:
description: 'Run compliance scan after build'
required: false
type: boolean
default: false
copy_to_acr:
description: 'Copy the built image from ECR to ACR using skopeo after the build'
required: false
type: boolean
default: false
copy_timeout_minutes:
description: 'Timeout in minutes for the copy to ACR step'
required: false
type: number
default: 10
make_efa:
description: 'Enable AWS EFA support in the build'
required: false
type: boolean
default: false
secrets:
AWS_DEFAULT_REGION:
required: true
......@@ -98,26 +89,25 @@ on:
jobs:
build:
name: Build cuda${{ matrix.cuda_version }}-${{ matrix.platform }}
name: Build cuda${{ matrix.cuda_version }}
runs-on: prod-builder-v3
timeout-minutes: ${{ inputs.build_timeout_minutes }}
strategy:
fail-fast: false
matrix:
platform: ${{ fromJson(inputs.platforms) }}
cuda_version: ${{ fromJson(inputs.cuda_versions) }}
steps:
- name: Checkout repository
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
lfs: true
- name: Build
- name: Build Flavor
id: build
uses: ./.github/actions/build-flavor
with:
framework: ${{ inputs.framework }}
target: ${{ inputs.target }}
platform: ${{ matrix.platform }}
platform: ${{ inputs.platform }}
cuda_version: ${{ matrix.cuda_version }}
builder_name: ${{ inputs.builder_name }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
......@@ -132,19 +122,9 @@ jobs:
build_timeout_minutes: ${{ inputs.build_timeout_minutes }}
push_image: ${{ inputs.push_image }}
no_cache: ${{ inputs.no_cache }}
make_efa: ${{ inputs.make_efa }}
extra_tags: ${{ inputs.extra_tags }}
sanitized_ref_name: ${{ inputs.sanitized_ref_name }}
build_only: ${{ inputs.build_only }}
show_summary: ${{ inputs.push_image }}
- name: Refresh BuildKit builder
if: ${{ inputs.target != 'dev' }}
uses: ./.github/actions/builder-refresher
with:
builder_name: ${{ inputs.builder_name }}
flavor: ${{ inputs.framework }}
arch: ${{ matrix.platform }}
cuda_version: ${{ matrix.cuda_version }}
make_efa: ${{ inputs.make_efa }}
- name: Copy image to ACR
if: inputs.copy_to_acr
timeout-minutes: ${{ inputs.copy_timeout_minutes }}
......@@ -152,15 +132,16 @@ jobs:
with:
source_registry: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
source_image: ai-dynamo/dynamo
source_tag: ${{ steps.build.outputs.target_tag_plain }}-cuda${{ steps.build.outputs.cuda_version_plain }}-${{ matrix.platform }}
source_tag: ${{ steps.build.outputs.target_tag_plain }}-cuda${{ steps.build.outputs.cuda_version_plain }}
target_registry: ${{ secrets.AZURE_ACR_HOSTNAME }}
target_image: ai-dynamo/dynamo
target_tag: ${{ steps.build.outputs.target_tag_plain }}-cuda${{ steps.build.outputs.cuda_version_plain }}-${{ matrix.platform }}
target_tag: ${{ steps.build.outputs.target_tag_plain }}-cuda${{ steps.build.outputs.cuda_version_plain }}
source_aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
source_aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
target_azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
target_azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
target_azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
override_arch: amd64 # We are using AMD64 images only on the rest of the clusters.
- name: Calculate compliance image URI
id: compliance-image
if: inputs.run_compliance_scan
......@@ -168,20 +149,25 @@ jobs:
run: |
CUDA_MAJOR="${{ matrix.cuda_version }}"
CUDA_MAJOR="${CUDA_MAJOR%%.*}"
EFA_SUFFIX=""
if [ "${{ inputs.make_efa }}" == "true" ]; then
EFA_SUFFIX="-efa"
fi
TARGET_TAG="${{ github.sha }}-${{ inputs.framework }}-${{ inputs.target }}${EFA_SUFFIX}"
IMAGE="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG}-cuda${CUDA_MAJOR}-${{ matrix.platform }}"
TARGET_TAG="${{ github.sha }}-${{ inputs.framework }}-${{ inputs.target }}"
IMAGE="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG}-cuda${CUDA_MAJOR}"
echo "runtime_image=${IMAGE}" >> $GITHUB_OUTPUT
echo "cuda_major=${CUDA_MAJOR}" >> $GITHUB_OUTPUT
- name: Compliance scan
if: inputs.run_compliance_scan
- name: Compliance scan (amd64)
if: inputs.run_compliance_scan && contains(inputs.platform, 'amd64')
uses: ./.github/actions/compliance-scan
with:
image: ${{ steps.compliance-image.outputs.runtime_image }}
artifact_name: compliance-${{ inputs.framework }}-${{ inputs.target }}-cuda${{ steps.compliance-image.outputs.cuda_major }}-amd64
arch: linux/amd64
framework: ${{ inputs.framework }}
cuda_version: ${{ matrix.cuda_version }}
- name: Compliance scan (arm64)
if: inputs.run_compliance_scan && contains(inputs.platform, 'arm64')
uses: ./.github/actions/compliance-scan
with:
image: ${{ steps.compliance-image.outputs.runtime_image }}
artifact_name: compliance-${{ inputs.framework }}-${{ inputs.target }}${{ inputs.make_efa && '-efa' || '' }}-cuda${{ steps.compliance-image.outputs.cuda_major }}-${{ matrix.platform }}
arch: ${{ matrix.platform }}
artifact_name: compliance-${{ inputs.framework }}-${{ inputs.target }}-cuda${{ steps.compliance-image.outputs.cuda_major }}-arm64
arch: linux/arm64
framework: ${{ inputs.framework }}
cuda_version: ${{ matrix.cuda_version }}
......@@ -91,7 +91,7 @@ jobs:
with:
builder_name: ${{ needs.changed-files.outputs.builder_name }}
flavor: general
all_arch: 'true'
arch: 'linux/amd64,linux/arm64'
- name: Docker Login
uses: ./.github/actions/docker-login
with:
......@@ -157,6 +157,7 @@ jobs:
echo "epp_image_uri: ${{ steps.calculate-target-tag.outputs.epp_image_uri }}"
echo "push_image: true"
- name: Build Frontend Container
id: build-image
timeout-minutes: 30
......
......@@ -52,46 +52,49 @@ jobs:
vllm-pipeline:
needs: [init]
if: inputs.build_vllm
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
uses: ./.github/workflows/build-flavor.yml
with:
framework: vllm
target: runtime
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.init.outputs.builder_name }}
build_timeout_minutes: 60
build_only: true
sanitized_ref_name: ${{ needs.init.outputs.sanitized_ref_name }}
push_image: true
run_compliance_scan: false
copy_to_acr: true
secrets: inherit
sglang-pipeline:
needs: [init]
if: inputs.build_sglang
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
uses: ./.github/workflows/build-flavor.yml
with:
framework: sglang
target: runtime
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.init.outputs.builder_name }}
build_timeout_minutes: 60
build_only: true
sanitized_ref_name: ${{ needs.init.outputs.sanitized_ref_name }}
push_image: true
run_compliance_scan: false
copy_to_acr: true
secrets: inherit
trtllm-pipeline:
needs: [init]
if: inputs.build_trtllm
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
uses: ./.github/workflows/build-flavor.yml
with:
framework: trtllm
target: runtime
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["13.1"]'
builder_name: ${{ needs.init.outputs.builder_name }}
build_timeout_minutes: 60
build_only: true
sanitized_ref_name: ${{ needs.init.outputs.sanitized_ref_name }}
push_image: true
run_compliance_scan: false
copy_to_acr: true
secrets: inherit
# ============================================================================
......@@ -115,7 +118,7 @@ jobs:
with:
builder_name: ${{ needs.init.outputs.builder_name }}
flavor: general
all_arch: 'true'
arch: 'linux/amd64,linux/arm64'
- name: Docker Login
uses: ./.github/actions/docker-login
with:
......
......@@ -14,10 +14,11 @@ on:
description: 'Target stage for Docker rendering'
required: true
type: string
platforms:
description: 'Platforms to build (JSON array, e.g., ["amd64", "arm64"])'
required: true
platform:
description: 'Docker platform(s) to build (e.g. linux/amd64,linux/arm64)'
required: false
type: string
default: 'linux/amd64,linux/arm64'
cuda_versions:
description: 'CUDA versions to build (JSON array, e.g., ["12.9", "13.0"])'
required: true
......@@ -84,7 +85,7 @@ on:
required: true
type: string
extra_tags:
description: 'Additional tags (newline-separated, -$platform suffix auto-appended)'
description: 'Additional tags (newline-separated)'
required: false
type: string
default: ''
......@@ -113,16 +114,6 @@ on:
required: false
type: boolean
default: false
build_only:
description: 'Build and push only skip all tests, show summary'
required: false
type: boolean
default: false
sanitized_ref_name:
description: 'Sanitized git ref name for branch-tagged images (used with build_only)'
required: false
type: string
default: ''
secrets:
AWS_DEFAULT_REGION:
required: true
......@@ -148,7 +139,6 @@ jobs:
strategy:
fail-fast: false
matrix:
platform: ${{ fromJson(inputs.platforms) }}
cuda_version: ${{ fromJson(inputs.cuda_versions) }}
# This name weirdly shows in the checks overview, but not once you dive into
# a specific workflow. Keeping it as a short placeholder
......@@ -156,13 +146,13 @@ jobs:
uses: ./.github/workflows/build-test-distribute-flavor.yml
with:
framework: ${{ inputs.framework }}
platform: ${{ matrix.platform }}
platform: ${{ inputs.platform }}
target: ${{ inputs.target }}
cuda_version: ${{ matrix.cuda_version }}
extra_tags: ${{ inputs.extra_tags }}
no_cache: ${{ inputs.no_cache }}
builder_name: ${{ inputs.builder_name }}
build_image: ${{ inputs.build_only || inputs.build_image }}
build_image: ${{ inputs.build_image }}
build_timeout_minutes: ${{ inputs.build_timeout_minutes }}
push_image: ${{ inputs.push_image }}
run_cpu_only_tests: ${{ inputs.run_cpu_only_tests }}
......@@ -174,10 +164,8 @@ jobs:
run_multi_gpu_tests: ${{ inputs.run_multi_gpu_tests }}
multi_gpu_test_markers: ${{ inputs.multi_gpu_test_markers }}
multi_gpu_test_timeout_minutes: ${{ inputs.multi_gpu_test_timeout_minutes }}
copy_to_acr: ${{ inputs.copy_to_acr && matrix.platform == 'amd64' }} # no reason to copy ARM images to ACR
copy_to_acr: ${{ inputs.copy_to_acr }}
copy_timeout_minutes: ${{ inputs.copy_timeout_minutes }}
make_efa: ${{ inputs.make_efa }}
show_summary: ${{ inputs.build_only || inputs.show_summary }}
build_only: ${{ inputs.build_only }}
sanitized_ref_name: ${{ inputs.sanitized_ref_name }}
show_summary: ${{ inputs.show_summary }}
secrets: inherit
......@@ -15,7 +15,7 @@ on:
required: true
type: string
platform:
description: 'Platform to build (amd64 or arm64)'
description: 'Docker platform(s) to build (e.g. linux/amd64,linux/arm64)'
required: true
type: string
cuda_version:
......@@ -84,7 +84,7 @@ on:
required: true
type: string
extra_tags:
description: 'Additional tags (newline-separated, -$platform suffix auto-appended)'
description: 'Additional tags (newline-separated)'
required: false
type: string
default: ''
......@@ -118,16 +118,6 @@ on:
required: false
type: boolean
default: false
build_only:
description: 'Build and push only skip all tests, show summary'
required: false
type: boolean
default: false
sanitized_ref_name:
description: 'Sanitized git ref name for branch-tagged images (used with build_only)'
required: false
type: string
default: ''
secrets:
AWS_DEFAULT_REGION:
required: true
......@@ -153,12 +143,14 @@ jobs:
# ============================================================================
build:
if: inputs.build_image
name: Build cuda${{ inputs.cuda_version }}-${{ inputs.platform }}
name: Build cuda${{ inputs.cuda_version }}
runs-on: prod-builder-v3
timeout-minutes: ${{ inputs.build_timeout_minutes }}
outputs:
target_tag_plain: ${{ steps.build.outputs.target_tag_plain }}
test_tag_plain: ${{ steps.build.outputs.test_tag_plain }}
compliance_arches: ${{ steps.compliance-arches.outputs.arches }}
test_runners: ${{ steps.test-runners.outputs.runners }}
env:
FRAMEWORK: ${{ inputs.framework }}
steps:
......@@ -166,6 +158,32 @@ jobs:
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
lfs: true
- name: Compute compliance arches
id: compliance-arches
shell: bash
run: |
PLATFORM="${{ inputs.platform }}"
# Convert comma-separated Docker platform string to JSON array
# "linux/amd64,linux/arm64" -> ["linux/amd64","linux/arm64"]
# "linux/amd64" -> ["linux/amd64"]
JSON=$(printf '%s\n' ${PLATFORM//,/ } | jq -Rsc '[splits("\n") | select(length>0)]')
echo "arches=${JSON}" >> $GITHUB_OUTPUT
- name: Compute test runners
id: test-runners
shell: bash
run: |
PLATFORM="${{ inputs.platform }}"
# Emit a JSON array of {arch, runner} objects for platforms actually being built
# "linux/amd64,linux/arm64" -> both runners
# "linux/amd64" -> amd64 runner only (covers EFA and other single-arch)
# "linux/arm64" -> arm64 runner only
if [[ "$PLATFORM" == *"amd64"* && "$PLATFORM" == *"arm64"* ]]; then
echo 'runners=[{"arch":"amd64","runner":"prod-tester-amd-gpu-v1"},{"arch":"arm64","runner":"prod-tester-arm-v1"}]' >> $GITHUB_OUTPUT
elif [[ "$PLATFORM" == *"arm64"* ]]; then
echo 'runners=[{"arch":"arm64","runner":"prod-tester-arm-v1"}]' >> $GITHUB_OUTPUT
else
echo 'runners=[{"arch":"amd64","runner":"prod-tester-amd-gpu-v1"}]' >> $GITHUB_OUTPUT
fi
- name: Build
id: build
uses: ./.github/actions/build-flavor
......@@ -190,23 +208,23 @@ jobs:
no_cache: ${{ inputs.no_cache }}
make_efa: ${{ inputs.make_efa }}
extra_tags: ${{ inputs.extra_tags }}
build_only: ${{ inputs.build_only }}
sanitized_ref_name: ${{ inputs.sanitized_ref_name }}
show_summary: ${{ inputs.push_image && inputs.show_summary }}
# ============================================================================
# TEST
# ============================================================================
test:
if: |
!inputs.build_only &&
( inputs.run_cpu_only_tests || inputs.run_single_gpu_tests ) &&
inputs.build_image
needs: [build]
name: Test cuda${{ inputs.cuda_version }}-${{ inputs.platform }}
runs-on: ${{ inputs.platform == 'amd64' && 'prod-tester-amd-gpu-v1' || 'prod-tester-arm-v1' }}
name: Test cuda${{ inputs.cuda_version }} (${{ matrix.arch }})
strategy:
fail-fast: false
matrix:
include: ${{ fromJson(needs.build.outputs.test_runners) }}
runs-on: ${{ matrix.runner }}
env:
FRAMEWORK: ${{ inputs.framework }}
steps:
......@@ -219,9 +237,9 @@ jobs:
CUDA_VERSION_RAW=${{ inputs.cuda_version }}
CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}-${{ inputs.platform }}
RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}
echo "runtime_image=${RUNTIME_IMAGE}" >> $GITHUB_OUTPUT
TEST_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_plain }}-cuda${CUDA_VERSION}-${{ inputs.platform }}
TEST_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_plain }}-cuda${CUDA_VERSION}
echo "test_image=${TEST_IMAGE}" >> $GITHUB_OUTPUT
- name: Docker Login
uses: ./.github/actions/docker-login
......@@ -273,7 +291,7 @@ jobs:
pytest_marks: ${{ inputs.cpu_only_test_markers }}
framework: ${{ inputs.framework }}
test_type: "pre_merge_cpu"
platform_arch: ${{ inputs.platform }}
platform_arch: ${{ matrix.arch }}
hf_token: ${{ secrets.HF_TOKEN }}
parallel_mode: 'auto'
dind_as_sidecar: 'true'
......@@ -282,14 +300,14 @@ jobs:
# These are e2e tests marked with gpu_1 that require GPU hardware
- name: Run GPU tests (sequential)
timeout-minutes: ${{ inputs.single_gpu_test_timeout_minutes }}
if: ( inputs.platform == 'amd64' && inputs.run_single_gpu_tests == true ) # We only run GPU tests on amd64
if: inputs.run_single_gpu_tests && matrix.arch == 'amd64'
uses: ./.github/actions/pytest
with:
image_tag: ${{ steps.calculate-target-tag.outputs.test_image }}
pytest_marks: ${{ inputs.single_gpu_test_markers }}
framework: ${{ inputs.framework }}
test_type: "pre_merge_gpu"
platform_arch: ${{ inputs.platform }}
platform_arch: ${{ matrix.arch }}
hf_token: ${{ secrets.HF_TOKEN }}
parallel_mode: 'none'
dind_as_sidecar: 'true'
......@@ -301,12 +319,10 @@ jobs:
multi-gpu-test:
# Multi-GPU support limited to AMD64 only
if: |
!inputs.build_only &&
inputs.run_multi_gpu_tests &&
inputs.build_image &&
( inputs.platform != 'arm64' )
inputs.build_image
needs: [build]
name: Multi-gpu test cuda${{ inputs.cuda_version }}-${{ inputs.platform }}
name: Multi-gpu test cuda${{ inputs.cuda_version }}
runs-on: prod-tester-amd-gpu-4-v1
env:
FRAMEWORK: ${{ inputs.framework }}
......@@ -320,9 +336,9 @@ jobs:
CUDA_VERSION_RAW=${{ inputs.cuda_version }}
CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}-${{ inputs.platform }}
RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}
echo "runtime_image=${RUNTIME_IMAGE}" >> $GITHUB_OUTPUT
TEST_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_plain }}-cuda${CUDA_VERSION}-${{ inputs.platform }}
TEST_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_plain }}-cuda${CUDA_VERSION}
echo "test_image=${TEST_IMAGE}" >> $GITHUB_OUTPUT
- name: Docker Login
uses: ./.github/actions/docker-login
......@@ -352,7 +368,7 @@ jobs:
pytest_marks: ${{ inputs.multi_gpu_test_markers }}
framework: ${{ inputs.framework }}
test_type: "pre_merge_gpu"
platform_arch: ${{ inputs.platform }}
platform_arch: amd64
hf_token: ${{ secrets.HF_TOKEN }}
parallel_mode: 'none'
dind_as_sidecar: 'true'
......@@ -364,7 +380,11 @@ jobs:
compliance:
if: inputs.build_image && inputs.push_image
needs: [build]
name: Compliance cuda${{ inputs.cuda_version }}-${{ inputs.platform }}
strategy:
fail-fast: false
matrix:
arch: ${{ fromJson(needs.build.outputs.compliance_arches) }}
name: Compliance cuda${{ inputs.cuda_version }}-${{ matrix.arch }}
runs-on: prod-builder-v3
steps:
- name: Checkout repository
......@@ -377,21 +397,24 @@ jobs:
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
- name: Calculate image URI
- name: Calculate image URI and arch suffix
id: images
shell: bash
run: |
CUDA_VERSION_RAW=${{ inputs.cuda_version }}
CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
echo "cuda_major=${CUDA_VERSION}" >> $GITHUB_OUTPUT
RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}-${{ inputs.platform }}
RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}
echo "runtime_image=${RUNTIME_IMAGE}" >> $GITHUB_OUTPUT
# Sanitize arch for artifact name: linux/amd64 -> amd64 (artifact names can't contain /)
ARCH="${{ matrix.arch }}"
echo "arch_suffix=${ARCH#linux/}" >> $GITHUB_OUTPUT
- name: Compliance scan
uses: ./.github/actions/compliance-scan
with:
image: ${{ steps.images.outputs.runtime_image }}
artifact_name: compliance-${{ inputs.framework }}-${{ inputs.target }}${{ inputs.make_efa && '-efa' || '' }}-cuda${{ steps.images.outputs.cuda_major }}-${{ inputs.platform }}
arch: ${{ inputs.platform }}
artifact_name: compliance-${{ inputs.framework }}-${{ inputs.target }}${{ inputs.make_efa && '-efa' || '' }}-cuda${{ steps.images.outputs.cuda_major }}-${{ steps.images.outputs.arch_suffix }}
arch: ${{ matrix.arch }}
framework: ${{ inputs.framework }}
cuda_version: ${{ inputs.cuda_version }}
......@@ -404,11 +427,10 @@ jobs:
# Run if copy_to_acr is true AND build succeeded AND (test succeeded OR test was skipped)
if: |
always() &&
!inputs.build_only &&
inputs.copy_to_acr &&
needs.build.result == 'success' &&
(needs.test.result == 'success' || needs.test.result == 'skipped')
name: copy-to-acr cuda${{ inputs.cuda_version }}-${{ inputs.platform }}
name: copy-to-acr cuda${{ inputs.cuda_version }}
runs-on: prod-default-small-v2
outputs:
target_tag_plain: ${{ needs.build.outputs.target_tag_plain }}
......@@ -430,12 +452,13 @@ jobs:
with:
source_registry: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
source_image: ai-dynamo/dynamo
source_tag: ${{ needs.build.outputs.target_tag_plain }}-cuda${{ steps.calculate-target-tag.outputs.cuda_version_plain }}-${{ inputs.platform }}
source_tag: ${{ needs.build.outputs.target_tag_plain }}-cuda${{ steps.calculate-target-tag.outputs.cuda_version_plain }}
target_registry: ${{ secrets.AZURE_ACR_HOSTNAME }}
target_image: ai-dynamo/dynamo
target_tag: ${{ needs.build.outputs.target_tag_plain }}-cuda${{ steps.calculate-target-tag.outputs.cuda_version_plain }}-${{ inputs.platform }}
target_tag: ${{ needs.build.outputs.target_tag_plain }}-cuda${{ steps.calculate-target-tag.outputs.cuda_version_plain }}
source_aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
source_aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
target_azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
target_azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
target_azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
override_arch: amd64 # We are using AMD64 images only on the rest of the clusters.
......@@ -74,7 +74,7 @@ jobs:
with:
builder_name: ${{ needs.changed-files.outputs.builder_name }}
flavor: general
arch: amd64
arch: linux/amd64
- name: Docker Login
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push'
uses: ./.github/actions/docker-login
......
......@@ -24,7 +24,7 @@ jobs:
framework: vllm
target: runtime
no_cache: true
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]'
extra_tags: |
${{ github.ref_name == 'main' && 'main-vllm' || '' }}
......@@ -47,7 +47,7 @@ jobs:
framework: sglang
target: runtime
no_cache: true
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]'
extra_tags: |
${{ github.ref_name == 'main' && 'main-sglang' || '' }}
......@@ -70,7 +70,7 @@ jobs:
framework: trtllm
target: runtime
no_cache: true
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["13.1"]'
extra_tags: |
${{ github.ref_name == 'main' && 'main-trtllm' || '' }}
......
......@@ -25,7 +25,7 @@ jobs:
with:
framework: vllm
target: runtime
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]'
extra_tags: |
${{ github.ref_name == 'main' && 'main-vllm' || '' }}
......@@ -50,7 +50,7 @@ jobs:
with:
framework: sglang
target: runtime
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]'
extra_tags: |
${{ github.ref_name == 'main' && 'main-sglang' || '' }}
......@@ -75,7 +75,7 @@ jobs:
with:
framework: trtllm
target: runtime
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["13.1"]'
extra_tags: |
${{ github.ref_name == 'main' && 'main-trtllm' || '' }}
......@@ -96,11 +96,11 @@ jobs:
# ============================================================================
vllm-dev-pipeline:
name: vllm-dev
uses: ./.github/workflows/build-flavor-matrix.yml
uses: ./.github/workflows/build-flavor.yml
with:
framework: vllm
target: dev
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]'
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
build_timeout_minutes: 60
......@@ -111,11 +111,11 @@ jobs:
sglang-dev-pipeline:
name: sglang-dev
uses: ./.github/workflows/build-flavor-matrix.yml
uses: ./.github/workflows/build-flavor.yml
with:
framework: sglang
target: dev
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]'
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
build_timeout_minutes: 60
......@@ -126,11 +126,11 @@ jobs:
trtllm-dev-pipeline:
name: trtllm-dev
uses: ./.github/workflows/build-flavor-matrix.yml
uses: ./.github/workflows/build-flavor.yml
with:
framework: trtllm
target: dev
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["13.1"]'
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
build_timeout_minutes: 60
......@@ -150,7 +150,7 @@ jobs:
with:
framework: vllm
target: runtime
platforms: '["amd64"]'
platform: 'linux/amd64'
cuda_versions: '["12.9"]'
make_efa: true
extra_tags: |
......@@ -174,7 +174,7 @@ jobs:
with:
framework: trtllm
target: runtime
platforms: '["amd64"]'
platform: 'linux/amd64'
cuda_versions: '["13.1"]'
make_efa: true
extra_tags: |
......@@ -211,7 +211,7 @@ jobs:
with:
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
flavor: general
all_arch: 'true'
arch: 'linux/amd64,linux/arm64'
- name: Docker Login
uses: ./.github/actions/docker-login
with:
......@@ -322,7 +322,7 @@ jobs:
with:
framework: vllm
profiles: '["agg", "agg_router", "disagg", "disagg_router"]'
image_suffix: vllm-runtime-cuda12-amd64
image_suffix: vllm-runtime-cuda12
namespace: ${{ needs.deploy-operator.outputs.namespace }}
vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }}
operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }}
......@@ -334,7 +334,7 @@ jobs:
with:
framework: sglang
profiles: '["agg", "agg_router"]'
image_suffix: sglang-runtime-cuda12-amd64
image_suffix: sglang-runtime-cuda12
namespace: ${{ needs.deploy-operator.outputs.namespace }}
vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }}
operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }}
......@@ -346,7 +346,7 @@ jobs:
with:
framework: trtllm
profiles: '["agg", "agg_router"]'
image_suffix: trtllm-runtime-cuda13-amd64
image_suffix: trtllm-runtime-cuda13
namespace: ${{ needs.deploy-operator.outputs.namespace }}
vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }}
operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }}
......
......@@ -99,7 +99,7 @@ jobs:
with:
builder_name: ${{ needs.changed-files.outputs.builder_name }}
flavor: general
all_arch: 'true'
arch: 'linux/amd64,linux/arm64'
- name: Docker Login
uses: ./.github/actions/docker-login
with:
......@@ -187,7 +187,7 @@ jobs:
with:
framework: vllm
target: runtime
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60
......@@ -213,7 +213,7 @@ jobs:
with:
framework: sglang
target: runtime
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60
......@@ -238,7 +238,7 @@ jobs:
with:
framework: trtllm
target: runtime
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["13.1"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60
......@@ -259,11 +259,11 @@ jobs:
name: vllm-dev
needs: [changed-files]
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true'
uses: ./.github/workflows/build-flavor-matrix.yml
uses: ./.github/workflows/build-flavor.yml
with:
framework: vllm
target: dev
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60
......@@ -274,11 +274,11 @@ jobs:
name: sglang-dev
needs: [changed-files]
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true'
uses: ./.github/workflows/build-flavor-matrix.yml
uses: ./.github/workflows/build-flavor.yml
with:
framework: sglang
target: dev
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60
......@@ -289,11 +289,11 @@ jobs:
name: trtllm-dev
needs: [changed-files]
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true'
uses: ./.github/workflows/build-flavor-matrix.yml
uses: ./.github/workflows/build-flavor.yml
with:
framework: trtllm
target: dev
platforms: '["amd64", "arm64"]'
platform: 'linux/amd64,linux/arm64'
cuda_versions: '["13.1"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60
......@@ -343,7 +343,7 @@ jobs:
with:
framework: vllm
profiles: '["agg", "agg_router", "disagg", "disagg_router"]'
image_suffix: vllm-runtime-cuda12-amd64
image_suffix: vllm-runtime-cuda12
namespace: ${{ needs.deploy-operator.outputs.namespace }}
vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }}
operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }}
......@@ -360,7 +360,7 @@ jobs:
with:
framework: sglang
profiles: '["agg", "agg_router"]'
image_suffix: sglang-runtime-cuda12-amd64
image_suffix: sglang-runtime-cuda12
namespace: ${{ needs.deploy-operator.outputs.namespace }}
vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }}
operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }}
......@@ -377,7 +377,7 @@ jobs:
with:
framework: trtllm
profiles: '["agg", "agg_router"]'
image_suffix: trtllm-runtime-cuda13-amd64
image_suffix: trtllm-runtime-cuda13
namespace: ${{ needs.deploy-operator.outputs.namespace }}
vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }}
operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }}
......
......@@ -189,10 +189,14 @@ jobs:
echo "========================================"
copy_image() {
local SRC="$1" DST="$2" LABEL="$3"
local SRC="$1" DST="$2" LABEL="$3" PLATFORM="${4:-}"
local PLATFORM_ARG=""
if [ -n "${PLATFORM}" ]; then
PLATFORM_ARG="--platform ${PLATFORM}"
fi
echo "----------------------------------------"
echo "Copying: ${LABEL}"
if crane copy "${SRC}" "${DST}"; then
if crane copy ${PLATFORM_ARG} "${SRC}" "${DST}"; then
echo " Copied: ${LABEL}"
SUCCESSFUL_COPIES+=("${LABEL}")
return 0
......@@ -223,9 +227,9 @@ jobs:
for FRAMEWORK in "${CUDA12_FRAMEWORKS[@]}"; do
NGC_NAME="${FRAMEWORK}-runtime"
for ARCH in "${ARCHITECTURES[@]}"; do
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-${FRAMEWORK}-runtime-cuda12-${ARCH}"
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-${FRAMEWORK}-runtime-cuda12"
TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${NGC_NAME}:${NGC_VERSION_TAG}-${ARCH}"
copy_image "${SOURCE}" "${TARGET}" "${NGC_NAME}:${NGC_VERSION_TAG}-${ARCH}"
copy_image "${SOURCE}" "${TARGET}" "${NGC_NAME}:${NGC_VERSION_TAG}-${ARCH}" "linux/${ARCH}"
done
create_manifest \
......@@ -247,9 +251,9 @@ jobs:
fi
for ARCH in "${ARCHITECTURES[@]}"; do
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-${FRAMEWORK}-runtime-cuda13-${ARCH}"
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-${FRAMEWORK}-runtime-cuda13"
TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${NGC_NAME}:${NGC_VERSION_TAG}-cuda13-${ARCH}"
copy_image "${SOURCE}" "${TARGET}" "${NGC_NAME}:${NGC_VERSION_TAG}-cuda13-${ARCH}"
copy_image "${SOURCE}" "${TARGET}" "${NGC_NAME}:${NGC_VERSION_TAG}-cuda13-${ARCH}" "linux/${ARCH}"
done
create_manifest \
......@@ -264,12 +268,12 @@ jobs:
echo "=== EFA Runtime Images ==="
# vllm EFA (CUDA 12, amd64 only)
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-vllm-runtime-efa-cuda12-amd64"
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-vllm-runtime-efa-cuda12"
TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/vllm-runtime:${NGC_VERSION_TAG}-efa"
copy_image "${SOURCE}" "${TARGET}" "vllm-runtime:${NGC_VERSION_TAG}-efa"
# trtllm EFA (CUDA 13, amd64 only)
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-trtllm-runtime-efa-cuda13-amd64"
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-trtllm-runtime-efa-cuda13"
TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/tensorrtllm-runtime:${NGC_VERSION_TAG}-efa"
copy_image "${SOURCE}" "${TARGET}" "tensorrtllm-runtime:${NGC_VERSION_TAG}-efa"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment