Unverified Commit 20eee509 authored by Ran Rubin's avatar Ran Rubin Committed by GitHub
Browse files

ci: convert framework image pipelines to single multi-arch manifest builds (#7399)


Co-authored-by: default avatarClaude Sonnet 4.6 <noreply@anthropic.com>
parent b2aefc53
...@@ -8,7 +8,7 @@ inputs: ...@@ -8,7 +8,7 @@ inputs:
description: 'Target stage for Docker rendering' description: 'Target stage for Docker rendering'
required: true required: true
platform: platform:
description: 'Platform to build (amd64 or arm64)' description: 'Docker platform string (e.g. linux/amd64, linux/amd64,linux/arm64)'
required: true required: true
cuda_version: cuda_version:
description: 'CUDA version to build (e.g., 12.9, 13.0)' description: 'CUDA version to build (e.g., 12.9, 13.0)'
...@@ -68,15 +68,7 @@ inputs: ...@@ -68,15 +68,7 @@ inputs:
required: false required: false
default: 'false' default: 'false'
extra_tags: extra_tags:
description: 'Additional tags (newline-separated, -$platform suffix auto-appended)' description: 'Additional tags (newline-separated)'
required: false
default: ''
build_only:
description: 'Build and push only controls extra tag logic'
required: false
default: 'false'
sanitized_ref_name:
description: 'Sanitized git ref name for branch-tagged images'
required: false required: false
default: '' default: ''
show_summary: show_summary:
...@@ -121,8 +113,8 @@ runs: ...@@ -121,8 +113,8 @@ runs:
fi fi
TARGET_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}-${{ inputs.target }}${EFA_SUFFIX}" TARGET_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}-${{ inputs.target }}${EFA_SUFFIX}"
TEST_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}-${{ inputs.target }}${EFA_SUFFIX}-test" TEST_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}-${{ inputs.target }}${EFA_SUFFIX}-test"
DEFAULT_TARGET_IMAGE_URI="${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG_PLAIN}-cuda${CUDA_VERSION}-${{ inputs.platform }}" DEFAULT_TARGET_IMAGE_URI="${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG_PLAIN}-cuda${CUDA_VERSION}"
TEST_IMAGE_URI="${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com/ai-dynamo/dynamo:${TEST_TAG_PLAIN}-cuda${CUDA_VERSION}-${{ inputs.platform }}" TEST_IMAGE_URI="${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com/ai-dynamo/dynamo:${TEST_TAG_PLAIN}-cuda${CUDA_VERSION}"
echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT
echo "test_image_uri=${TEST_IMAGE_URI}" >> $GITHUB_OUTPUT echo "test_image_uri=${TEST_IMAGE_URI}" >> $GITHUB_OUTPUT
echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT
...@@ -135,7 +127,7 @@ runs: ...@@ -135,7 +127,7 @@ runs:
flavor: ${{ inputs.framework }} flavor: ${{ inputs.framework }}
arch: ${{ inputs.platform }} arch: ${{ inputs.platform }}
cuda_version: ${{ inputs.cuda_version }} cuda_version: ${{ inputs.cuda_version }}
- name: Calculate extra tags with platform suffix - name: Calculate extra tags
id: extra-tags id: extra-tags
shell: bash shell: bash
env: env:
...@@ -149,20 +141,10 @@ runs: ...@@ -149,20 +141,10 @@ runs:
if [ -n "$EXTRA_TAGS" ]; then if [ -n "$EXTRA_TAGS" ]; then
while IFS= read -r tag; do while IFS= read -r tag; do
if [ -n "$tag" ]; then if [ -n "$tag" ]; then
RESULT+="${ECR_REGISTRY}/ai-dynamo/dynamo:${tag}-cuda${CUDA_VERSION_MAJOR}-${{ inputs.platform }}"$'\n' RESULT+="${ECR_REGISTRY}/ai-dynamo/dynamo:${tag}-cuda${CUDA_VERSION_MAJOR}"$'\n'
fi fi
done <<< "$EXTRA_TAGS" done <<< "$EXTRA_TAGS"
fi fi
if [ "${{ inputs.build_only }}" == "true" ]; then
if [ -z "${{ inputs.sanitized_ref_name }}" ]; then
echo "::warning::sanitized_ref_name is empty but build_only is true; skipping branch tags"
else
BRANCH_TAG="${{ inputs.sanitized_ref_name }}-${{ inputs.framework }}"
RESULT+="${ECR_REGISTRY}/ai-dynamo/dynamo:${BRANCH_TAG}-cuda${CUDA_VERSION_MAJOR}-${{ inputs.platform }}"$'\n'
RESULT+="${ACR_REGISTRY}/ai-dynamo/dynamo:${BRANCH_TAG}-cuda${CUDA_VERSION_MAJOR}-${{ inputs.platform }}"$'\n'
RESULT+="${ACR_REGISTRY}/ai-dynamo/dynamo:${{ steps.calculate-target-tag.outputs.target_tag_plain }}-cuda${CUDA_VERSION_MAJOR}-${{ inputs.platform }}"$'\n'
fi
fi
if [ -n "$RESULT" ]; then if [ -n "$RESULT" ]; then
echo "tags<<EOF" >> $GITHUB_OUTPUT echo "tags<<EOF" >> $GITHUB_OUTPUT
echo "$RESULT" >> $GITHUB_OUTPUT echo "$RESULT" >> $GITHUB_OUTPUT
...@@ -236,7 +218,7 @@ runs: ...@@ -236,7 +218,7 @@ runs:
ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
run: | run: |
CUDA_MAJOR=${{ steps.calculate-target-tag.outputs.cuda_version_plain }} CUDA_MAJOR=${{ steps.calculate-target-tag.outputs.cuda_version_plain }}
CACHE_TAG="test-${{ inputs.framework }}-cuda${CUDA_MAJOR}-${{ inputs.platform }}-cache" CACHE_TAG="test-${{ inputs.framework }}-cuda${CUDA_MAJOR}-cache"
CACHE_ARGS="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${CACHE_TAG}" CACHE_ARGS="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${CACHE_TAG}"
CACHE_ARGS+=" --cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${CACHE_TAG}" CACHE_ARGS+=" --cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${CACHE_TAG}"
if [[ "$GITHUB_REF_NAME" == "main" ]]; then if [[ "$GITHUB_REF_NAME" == "main" ]]; then
...@@ -261,7 +243,7 @@ runs: ...@@ -261,7 +243,7 @@ runs:
--progress=plain \ --progress=plain \
${PUSH_ARGS} \ ${PUSH_ARGS} \
${NO_CACHE_ARG} \ ${NO_CACHE_ARG} \
--platform linux/${{ inputs.platform }} \ --platform ${{ inputs.platform }} \
-f container/Dockerfile.test \ -f container/Dockerfile.test \
--build-arg BASE_IMAGE=${{ steps.calculate-target-tag.outputs.default_target_image_uri }} \ --build-arg BASE_IMAGE=${{ steps.calculate-target-tag.outputs.default_target_image_uri }} \
${CACHE_ARGS} \ ${CACHE_ARGS} \
...@@ -270,7 +252,7 @@ runs: ...@@ -270,7 +252,7 @@ runs:
shell: bash shell: bash
if: ${{ inputs.push_image == 'true' && inputs.show_summary == 'true' }} if: ${{ inputs.push_image == 'true' && inputs.show_summary == 'true' }}
run: | run: |
echo "### 🐳 ${{ inputs.framework }}-cuda${{ inputs.cuda_version }}-${{ inputs.platform }} Default Image" >> $GITHUB_STEP_SUMMARY echo "### 🐳 ${{ inputs.framework }}-cuda${{ inputs.cuda_version }} Default Image" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY
echo "| Image URI |" >> $GITHUB_STEP_SUMMARY echo "| Image URI |" >> $GITHUB_STEP_SUMMARY
echo "|-----|" >> $GITHUB_STEP_SUMMARY echo "|-----|" >> $GITHUB_STEP_SUMMARY
......
...@@ -22,17 +22,13 @@ inputs: ...@@ -22,17 +22,13 @@ inputs:
description: 'Buildkit flavor used to route workers on re-init (vllm, sglang, trtllm, general)' description: 'Buildkit flavor used to route workers on re-init (vllm, sglang, trtllm, general)'
required: true required: true
arch: arch:
description: 'Target architecture used on re-init (amd64, arm64)' description: 'Docker platform string used on re-init (e.g. linux/amd64, linux/arm64, linux/amd64,linux/arm64)'
required: false required: false
default: 'amd64' default: 'linux/amd64'
cuda_version: cuda_version:
description: 'CUDA version used on re-init (e.g. 12.9, 13.0). Leave empty for general flavor.' description: 'CUDA version used on re-init (e.g. 12.9, 13.0). Leave empty for general flavor.'
required: false required: false
default: '' default: ''
all_arch:
description: 'If true, re-initialize builder for both amd64 and arm64 architectures'
required: false
default: 'false'
# Kubernetes fallback passthrough inputs (forwarded to init-dynamo-builder) # Kubernetes fallback passthrough inputs (forwarded to init-dynamo-builder)
ephemeral_storage: ephemeral_storage:
...@@ -90,7 +86,6 @@ runs: ...@@ -90,7 +86,6 @@ runs:
builder_name: ${{ inputs.builder_name }} builder_name: ${{ inputs.builder_name }}
flavor: ${{ inputs.flavor }} flavor: ${{ inputs.flavor }}
arch: ${{ inputs.arch }} arch: ${{ inputs.arch }}
all_arch: ${{ inputs.all_arch }}
cuda_version: ${{ inputs.cuda_version }} cuda_version: ${{ inputs.cuda_version }}
ephemeral_storage: ${{ inputs.ephemeral_storage }} ephemeral_storage: ${{ inputs.ephemeral_storage }}
namespace: ${{ inputs.namespace }} namespace: ${{ inputs.namespace }}
......
...@@ -12,9 +12,9 @@ inputs: ...@@ -12,9 +12,9 @@ inputs:
description: 'Name for the uploaded artifact (e.g., compliance-vllm-cuda12-amd64)' description: 'Name for the uploaded artifact (e.g., compliance-vllm-cuda12-amd64)'
required: true required: true
arch: arch:
description: 'Target architecture (amd64, arm64)' description: 'Docker platform string (e.g. linux/amd64, linux/arm64)'
required: false required: false
default: 'amd64' default: 'linux/amd64'
framework: framework:
description: 'Framework name for base image resolution (vllm, sglang, trtllm, dynamo)' description: 'Framework name for base image resolution (vllm, sglang, trtllm, dynamo)'
required: false required: false
...@@ -78,7 +78,7 @@ runs: ...@@ -78,7 +78,7 @@ runs:
mkdir -p /tmp/compliance-target mkdir -p /tmp/compliance-target
docker buildx build \ docker buildx build \
--builder ${{ steps.builder.outputs.name }} \ --builder ${{ steps.builder.outputs.name }} \
--platform linux/${{ inputs.arch }} \ --platform ${{ inputs.arch }} \
--build-arg TARGET_IMAGE="${{ inputs.image }}" \ --build-arg TARGET_IMAGE="${{ inputs.image }}" \
--output "type=local,dest=/tmp/compliance-target" \ --output "type=local,dest=/tmp/compliance-target" \
--pull \ --pull \
...@@ -102,7 +102,7 @@ runs: ...@@ -102,7 +102,7 @@ runs:
mkdir -p /tmp/compliance-base mkdir -p /tmp/compliance-base
docker buildx build \ docker buildx build \
--builder ${{ steps.builder.outputs.name }} \ --builder ${{ steps.builder.outputs.name }} \
--platform linux/${{ inputs.arch }} \ --platform ${{ inputs.arch }} \
--build-arg TARGET_IMAGE="${{ steps.resolve-base.outputs.base_image }}" \ --build-arg TARGET_IMAGE="${{ steps.resolve-base.outputs.base_image }}" \
--output "type=local,dest=/tmp/compliance-base" \ --output "type=local,dest=/tmp/compliance-base" \
--pull \ --pull \
......
...@@ -11,8 +11,8 @@ description: 'Route buildkit workers and bootstrap buildx builder for dynamo bui ...@@ -11,8 +11,8 @@ description: 'Route buildkit workers and bootstrap buildx builder for dynamo bui
# 4. Falls back to Kubernetes driver if no remote workers are available # 4. Falls back to Kubernetes driver if no remote workers are available
# #
# Architecture modes: # Architecture modes:
# - Single arch: Set arch to 'amd64' or 'arm64' to initialize for one architecture # - Single arch: Set arch to 'linux/amd64' or 'linux/arm64'
# - Multi arch: Set all_arch to 'true' to initialize for both amd64 and arm64 # - Multi arch: Set arch to 'linux/amd64,linux/arm64'
# #
# Flavor routing: # Flavor routing:
# BuildKit pods are assigned to flavors based on pod index modulo 3: # BuildKit pods are assigned to flavors based on pod index modulo 3:
...@@ -26,14 +26,14 @@ description: 'Route buildkit workers and bootstrap buildx builder for dynamo bui ...@@ -26,14 +26,14 @@ description: 'Route buildkit workers and bootstrap buildx builder for dynamo bui
# with: # with:
# builder_name: my-builder # builder_name: my-builder
# flavor: general # flavor: general
# all_arch: 'true' # arch: 'linux/amd64,linux/arm64'
# #
# # Initialize for single architecture with specific flavor and CUDA version: # # Initialize for single architecture with specific flavor and CUDA version:
# - uses: ./.github/actions/init-dynamo-builder # - uses: ./.github/actions/init-dynamo-builder
# with: # with:
# builder_name: my-builder # builder_name: my-builder
# flavor: vllm # flavor: vllm
# arch: amd64 # arch: 'linux/amd64'
# cuda_version: '12.9' # cuda_version: '12.9'
inputs: inputs:
...@@ -45,13 +45,9 @@ inputs: ...@@ -45,13 +45,9 @@ inputs:
required: false required: false
default: 'general' default: 'general'
arch: arch:
description: 'Target architecture (amd64, arm64). Ignored if all_arch is true.' description: 'Docker platform string: linux/amd64, linux/arm64, or linux/amd64,linux/arm64'
required: false required: false
default: 'amd64' default: 'linux/amd64'
all_arch:
description: 'If true, initialize builder for both amd64 and arm64 architectures'
required: false
default: 'false'
cuda_version: cuda_version:
description: 'CUDA version (12.9, 13.0). Optional for general flavor.' description: 'CUDA version (12.9, 13.0). Optional for general flavor.'
required: false required: false
...@@ -99,22 +95,29 @@ runs: ...@@ -99,22 +95,29 @@ runs:
if [[ -n "${{ inputs.cuda_version }}" ]]; then if [[ -n "${{ inputs.cuda_version }}" ]]; then
CUDA_ARG="--cuda ${{ inputs.cuda_version }}" CUDA_ARG="--cuda ${{ inputs.cuda_version }}"
fi fi
if [[ "${{ inputs.all_arch }}" == "true" ]]; then # Strip linux/ prefix (e.g. linux/amd64,linux/arm64 → amd64,arm64)
echo "running with --arch all --flavor ${{ inputs.flavor }} $CUDA_ARG" ARCH="${{ inputs.arch }}"
.github/scripts/route_buildkit.sh --arch all --flavor ${{ inputs.flavor }} $CUDA_ARG ARCH="${ARCH//linux\//}"
if [[ "$ARCH" == *","* ]]; then
ROUTE_ARCH="all"
else else
echo "running with --arch ${{ inputs.arch }} --flavor ${{ inputs.flavor }} $CUDA_ARG" ROUTE_ARCH="$ARCH"
.github/scripts/route_buildkit.sh --arch ${{ inputs.arch }} --flavor ${{ inputs.flavor }} $CUDA_ARG
fi fi
echo "running with --arch ${ROUTE_ARCH} --flavor ${{ inputs.flavor }} $CUDA_ARG"
.github/scripts/route_buildkit.sh --arch ${ROUTE_ARCH} --flavor ${{ inputs.flavor }} $CUDA_ARG
- name: Prepare worker addresses and platform - name: Prepare worker addresses and platform
id: prepare id: prepare
shell: bash shell: bash
env:
AMD64_ADDRS: ${{ steps.route-buildkit.outputs[format('{0}_amd64', inputs.flavor)] }}
ARM64_ADDRS: ${{ steps.route-buildkit.outputs[format('{0}_arm64', inputs.flavor)] }}
run: | run: |
if [[ "${{ inputs.all_arch }}" == "true" ]]; then # Strip linux/ prefix (e.g. linux/amd64,linux/arm64 → amd64,arm64)
# Combine both architecture outputs for multi-arch builds ARCH="${{ inputs.arch }}"
AMD64_ADDRS="${{ steps.route-buildkit.outputs[format('{0}_amd64', inputs.flavor)] }}" ARCH="${ARCH//linux\//}"
ARM64_ADDRS="${{ steps.route-buildkit.outputs[format('{0}_arm64', inputs.flavor)] }}" if [[ "$ARCH" == *","* ]]; then
# Multi-arch: combine both worker pools
if [[ -n "$AMD64_ADDRS" && -n "$ARM64_ADDRS" ]]; then if [[ -n "$AMD64_ADDRS" && -n "$ARM64_ADDRS" ]]; then
echo "worker_addresses=${AMD64_ADDRS},${ARM64_ADDRS}" >> "$GITHUB_OUTPUT" echo "worker_addresses=${AMD64_ADDRS},${ARM64_ADDRS}" >> "$GITHUB_OUTPUT"
elif [[ -n "$AMD64_ADDRS" ]]; then elif [[ -n "$AMD64_ADDRS" ]]; then
...@@ -124,9 +127,10 @@ runs: ...@@ -124,9 +127,10 @@ runs:
else else
echo "worker_addresses=" >> "$GITHUB_OUTPUT" echo "worker_addresses=" >> "$GITHUB_OUTPUT"
fi fi
elif [[ "$ARCH" == "arm64" ]]; then
echo "worker_addresses=${ARM64_ADDRS}" >> "$GITHUB_OUTPUT"
else else
# Single architecture build echo "worker_addresses=${AMD64_ADDRS}" >> "$GITHUB_OUTPUT"
echo "worker_addresses=${{ steps.route-buildkit.outputs[format('{0}_{1}', inputs.flavor, inputs.arch)] }}" >> "$GITHUB_OUTPUT"
fi fi
- name: Bootstrap buildkit - name: Bootstrap buildkit
......
...@@ -53,6 +53,10 @@ inputs: ...@@ -53,6 +53,10 @@ inputs:
target_azure_acr_password: target_azure_acr_password:
description: 'Azure ACR password for target registry' description: 'Azure ACR password for target registry'
required: false required: false
override_arch:
description: 'Copy only the specified architecture (e.g. amd64). If empty, copies all platforms (manifest list).'
required: false
default: ''
outputs: outputs:
target_image_ref: target_image_ref:
...@@ -109,7 +113,13 @@ runs: ...@@ -109,7 +113,13 @@ runs:
RETRY_DELAY=10 RETRY_DELAY=10
for attempt in $(seq 1 $MAX_RETRIES); do for attempt in $(seq 1 $MAX_RETRIES); do
echo "Attempt ${attempt}/${MAX_RETRIES}..." echo "Attempt ${attempt}/${MAX_RETRIES}..."
if skopeo copy --all --retry-times 4 "${SOURCE_REF}" "${TARGET_REF}"; then ARCH_FLAG=""
if [ -n "${{ inputs.override_arch }}" ]; then
ARCH_FLAG="--override-arch ${{ inputs.override_arch }}"
else
ARCH_FLAG="--all"
fi
if skopeo copy ${ARCH_FLAG} --retry-times 4 "${SOURCE_REF}" "${TARGET_REF}"; then
echo "target_image_ref=${{ inputs.target_registry }}/${TARGET_IMAGE}:${TARGET_TAG}" >> $GITHUB_OUTPUT echo "target_image_ref=${{ inputs.target_registry }}/${TARGET_IMAGE}:${TARGET_TAG}" >> $GITHUB_OUTPUT
echo "✅ Image copied successfully" echo "✅ Image copied successfully"
exit 0 exit 0
......
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
name: Build Framework Image Matrix name: Build Framework Image
on: on:
workflow_call: workflow_call:
...@@ -14,10 +14,11 @@ on: ...@@ -14,10 +14,11 @@ on:
description: 'Target stage for Docker rendering' description: 'Target stage for Docker rendering'
required: true required: true
type: string type: string
platforms: platform:
description: 'Platforms to build (JSON array, e.g., ["amd64", "arm64"])' description: 'Docker platform(s) to build (e.g. linux/amd64,linux/arm64)'
required: true required: false
type: string type: string
default: 'linux/amd64,linux/arm64'
cuda_versions: cuda_versions:
description: 'CUDA versions to build (JSON array, e.g., ["12.9", "13.0"])' description: 'CUDA versions to build (JSON array, e.g., ["12.9", "13.0"])'
required: true required: true
...@@ -27,7 +28,7 @@ on: ...@@ -27,7 +28,7 @@ on:
required: true required: true
type: string type: string
build_timeout_minutes: build_timeout_minutes:
description: 'Timeout in minutes for the build step' description: 'Timeout in minutes for the build job'
required: false required: false
type: number type: number
default: 60 default: 60
...@@ -36,13 +37,8 @@ on: ...@@ -36,13 +37,8 @@ on:
required: false required: false
type: boolean type: boolean
default: false default: false
extra_tags: copy_to_acr:
description: 'Additional tags (newline-separated, -$platform suffix auto-appended)' description: 'Also push the image to ACR'
required: false
type: string
default: ''
make_efa:
description: 'Enable AWS EFA support in the build'
required: false required: false
type: boolean type: boolean
default: false default: false
...@@ -51,31 +47,26 @@ on: ...@@ -51,31 +47,26 @@ on:
required: false required: false
type: boolean type: boolean
default: false default: false
sanitized_ref_name: extra_tags:
description: 'Sanitized git ref name for branch-tagged images' description: 'Additional tags (newline-separated)'
required: false required: false
type: string type: string
default: '' default: ''
build_only:
description: 'Build and push only enables branch-tagged images'
required: false
type: boolean
default: false
run_compliance_scan: run_compliance_scan:
description: 'Run compliance scan after build' description: 'Run compliance scan after build'
required: false required: false
type: boolean type: boolean
default: false default: false
copy_to_acr:
description: 'Copy the built image from ECR to ACR using skopeo after the build'
required: false
type: boolean
default: false
copy_timeout_minutes: copy_timeout_minutes:
description: 'Timeout in minutes for the copy to ACR step' description: 'Timeout in minutes for the copy to ACR step'
required: false required: false
type: number type: number
default: 10 default: 10
make_efa:
description: 'Enable AWS EFA support in the build'
required: false
type: boolean
default: false
secrets: secrets:
AWS_DEFAULT_REGION: AWS_DEFAULT_REGION:
required: true required: true
...@@ -98,26 +89,25 @@ on: ...@@ -98,26 +89,25 @@ on:
jobs: jobs:
build: build:
name: Build cuda${{ matrix.cuda_version }}-${{ matrix.platform }} name: Build cuda${{ matrix.cuda_version }}
runs-on: prod-builder-v3 runs-on: prod-builder-v3
timeout-minutes: ${{ inputs.build_timeout_minutes }} timeout-minutes: ${{ inputs.build_timeout_minutes }}
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
platform: ${{ fromJson(inputs.platforms) }}
cuda_version: ${{ fromJson(inputs.cuda_versions) }} cuda_version: ${{ fromJson(inputs.cuda_versions) }}
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with: with:
lfs: true lfs: true
- name: Build - name: Build Flavor
id: build id: build
uses: ./.github/actions/build-flavor uses: ./.github/actions/build-flavor
with: with:
framework: ${{ inputs.framework }} framework: ${{ inputs.framework }}
target: ${{ inputs.target }} target: ${{ inputs.target }}
platform: ${{ matrix.platform }} platform: ${{ inputs.platform }}
cuda_version: ${{ matrix.cuda_version }} cuda_version: ${{ matrix.cuda_version }}
builder_name: ${{ inputs.builder_name }} builder_name: ${{ inputs.builder_name }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
...@@ -132,19 +122,9 @@ jobs: ...@@ -132,19 +122,9 @@ jobs:
build_timeout_minutes: ${{ inputs.build_timeout_minutes }} build_timeout_minutes: ${{ inputs.build_timeout_minutes }}
push_image: ${{ inputs.push_image }} push_image: ${{ inputs.push_image }}
no_cache: ${{ inputs.no_cache }} no_cache: ${{ inputs.no_cache }}
make_efa: ${{ inputs.make_efa }}
extra_tags: ${{ inputs.extra_tags }} extra_tags: ${{ inputs.extra_tags }}
sanitized_ref_name: ${{ inputs.sanitized_ref_name }}
build_only: ${{ inputs.build_only }}
show_summary: ${{ inputs.push_image }} show_summary: ${{ inputs.push_image }}
- name: Refresh BuildKit builder make_efa: ${{ inputs.make_efa }}
if: ${{ inputs.target != 'dev' }}
uses: ./.github/actions/builder-refresher
with:
builder_name: ${{ inputs.builder_name }}
flavor: ${{ inputs.framework }}
arch: ${{ matrix.platform }}
cuda_version: ${{ matrix.cuda_version }}
- name: Copy image to ACR - name: Copy image to ACR
if: inputs.copy_to_acr if: inputs.copy_to_acr
timeout-minutes: ${{ inputs.copy_timeout_minutes }} timeout-minutes: ${{ inputs.copy_timeout_minutes }}
...@@ -152,15 +132,16 @@ jobs: ...@@ -152,15 +132,16 @@ jobs:
with: with:
source_registry: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com source_registry: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
source_image: ai-dynamo/dynamo source_image: ai-dynamo/dynamo
source_tag: ${{ steps.build.outputs.target_tag_plain }}-cuda${{ steps.build.outputs.cuda_version_plain }}-${{ matrix.platform }} source_tag: ${{ steps.build.outputs.target_tag_plain }}-cuda${{ steps.build.outputs.cuda_version_plain }}
target_registry: ${{ secrets.AZURE_ACR_HOSTNAME }} target_registry: ${{ secrets.AZURE_ACR_HOSTNAME }}
target_image: ai-dynamo/dynamo target_image: ai-dynamo/dynamo
target_tag: ${{ steps.build.outputs.target_tag_plain }}-cuda${{ steps.build.outputs.cuda_version_plain }}-${{ matrix.platform }} target_tag: ${{ steps.build.outputs.target_tag_plain }}-cuda${{ steps.build.outputs.cuda_version_plain }}
source_aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} source_aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
source_aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} source_aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
target_azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }} target_azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
target_azure_acr_user: ${{ secrets.AZURE_ACR_USER }} target_azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
target_azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }} target_azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
override_arch: amd64 # We are using AMD64 images only on the rest of the clusters.
- name: Calculate compliance image URI - name: Calculate compliance image URI
id: compliance-image id: compliance-image
if: inputs.run_compliance_scan if: inputs.run_compliance_scan
...@@ -168,20 +149,25 @@ jobs: ...@@ -168,20 +149,25 @@ jobs:
run: | run: |
CUDA_MAJOR="${{ matrix.cuda_version }}" CUDA_MAJOR="${{ matrix.cuda_version }}"
CUDA_MAJOR="${CUDA_MAJOR%%.*}" CUDA_MAJOR="${CUDA_MAJOR%%.*}"
EFA_SUFFIX="" TARGET_TAG="${{ github.sha }}-${{ inputs.framework }}-${{ inputs.target }}"
if [ "${{ inputs.make_efa }}" == "true" ]; then IMAGE="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG}-cuda${CUDA_MAJOR}"
EFA_SUFFIX="-efa"
fi
TARGET_TAG="${{ github.sha }}-${{ inputs.framework }}-${{ inputs.target }}${EFA_SUFFIX}"
IMAGE="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG}-cuda${CUDA_MAJOR}-${{ matrix.platform }}"
echo "runtime_image=${IMAGE}" >> $GITHUB_OUTPUT echo "runtime_image=${IMAGE}" >> $GITHUB_OUTPUT
echo "cuda_major=${CUDA_MAJOR}" >> $GITHUB_OUTPUT echo "cuda_major=${CUDA_MAJOR}" >> $GITHUB_OUTPUT
- name: Compliance scan - name: Compliance scan (amd64)
if: inputs.run_compliance_scan if: inputs.run_compliance_scan && contains(inputs.platform, 'amd64')
uses: ./.github/actions/compliance-scan
with:
image: ${{ steps.compliance-image.outputs.runtime_image }}
artifact_name: compliance-${{ inputs.framework }}-${{ inputs.target }}-cuda${{ steps.compliance-image.outputs.cuda_major }}-amd64
arch: linux/amd64
framework: ${{ inputs.framework }}
cuda_version: ${{ matrix.cuda_version }}
- name: Compliance scan (arm64)
if: inputs.run_compliance_scan && contains(inputs.platform, 'arm64')
uses: ./.github/actions/compliance-scan uses: ./.github/actions/compliance-scan
with: with:
image: ${{ steps.compliance-image.outputs.runtime_image }} image: ${{ steps.compliance-image.outputs.runtime_image }}
artifact_name: compliance-${{ inputs.framework }}-${{ inputs.target }}${{ inputs.make_efa && '-efa' || '' }}-cuda${{ steps.compliance-image.outputs.cuda_major }}-${{ matrix.platform }} artifact_name: compliance-${{ inputs.framework }}-${{ inputs.target }}-cuda${{ steps.compliance-image.outputs.cuda_major }}-arm64
arch: ${{ matrix.platform }} arch: linux/arm64
framework: ${{ inputs.framework }} framework: ${{ inputs.framework }}
cuda_version: ${{ matrix.cuda_version }} cuda_version: ${{ matrix.cuda_version }}
...@@ -91,7 +91,7 @@ jobs: ...@@ -91,7 +91,7 @@ jobs:
with: with:
builder_name: ${{ needs.changed-files.outputs.builder_name }} builder_name: ${{ needs.changed-files.outputs.builder_name }}
flavor: general flavor: general
all_arch: 'true' arch: 'linux/amd64,linux/arm64'
- name: Docker Login - name: Docker Login
uses: ./.github/actions/docker-login uses: ./.github/actions/docker-login
with: with:
...@@ -157,6 +157,7 @@ jobs: ...@@ -157,6 +157,7 @@ jobs:
echo "epp_image_uri: ${{ steps.calculate-target-tag.outputs.epp_image_uri }}" echo "epp_image_uri: ${{ steps.calculate-target-tag.outputs.epp_image_uri }}"
echo "push_image: true" echo "push_image: true"
- name: Build Frontend Container - name: Build Frontend Container
id: build-image id: build-image
timeout-minutes: 30 timeout-minutes: 30
......
...@@ -52,46 +52,49 @@ jobs: ...@@ -52,46 +52,49 @@ jobs:
vllm-pipeline: vllm-pipeline:
needs: [init] needs: [init]
if: inputs.build_vllm if: inputs.build_vllm
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml uses: ./.github/workflows/build-flavor.yml
with: with:
framework: vllm framework: vllm
target: runtime target: runtime
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.init.outputs.builder_name }} builder_name: ${{ needs.init.outputs.builder_name }}
build_timeout_minutes: 60 build_timeout_minutes: 60
build_only: true push_image: true
sanitized_ref_name: ${{ needs.init.outputs.sanitized_ref_name }} run_compliance_scan: false
copy_to_acr: true
secrets: inherit secrets: inherit
sglang-pipeline: sglang-pipeline:
needs: [init] needs: [init]
if: inputs.build_sglang if: inputs.build_sglang
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml uses: ./.github/workflows/build-flavor.yml
with: with:
framework: sglang framework: sglang
target: runtime target: runtime
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.init.outputs.builder_name }} builder_name: ${{ needs.init.outputs.builder_name }}
build_timeout_minutes: 60 build_timeout_minutes: 60
build_only: true push_image: true
sanitized_ref_name: ${{ needs.init.outputs.sanitized_ref_name }} run_compliance_scan: false
copy_to_acr: true
secrets: inherit secrets: inherit
trtllm-pipeline: trtllm-pipeline:
needs: [init] needs: [init]
if: inputs.build_trtllm if: inputs.build_trtllm
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml uses: ./.github/workflows/build-flavor.yml
with: with:
framework: trtllm framework: trtllm
target: runtime target: runtime
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["13.1"]' cuda_versions: '["13.1"]'
builder_name: ${{ needs.init.outputs.builder_name }} builder_name: ${{ needs.init.outputs.builder_name }}
build_timeout_minutes: 60 build_timeout_minutes: 60
build_only: true push_image: true
sanitized_ref_name: ${{ needs.init.outputs.sanitized_ref_name }} run_compliance_scan: false
copy_to_acr: true
secrets: inherit secrets: inherit
# ============================================================================ # ============================================================================
...@@ -115,7 +118,7 @@ jobs: ...@@ -115,7 +118,7 @@ jobs:
with: with:
builder_name: ${{ needs.init.outputs.builder_name }} builder_name: ${{ needs.init.outputs.builder_name }}
flavor: general flavor: general
all_arch: 'true' arch: 'linux/amd64,linux/arm64'
- name: Docker Login - name: Docker Login
uses: ./.github/actions/docker-login uses: ./.github/actions/docker-login
with: with:
......
...@@ -14,10 +14,11 @@ on: ...@@ -14,10 +14,11 @@ on:
description: 'Target stage for Docker rendering' description: 'Target stage for Docker rendering'
required: true required: true
type: string type: string
platforms: platform:
description: 'Platforms to build (JSON array, e.g., ["amd64", "arm64"])' description: 'Docker platform(s) to build (e.g. linux/amd64,linux/arm64)'
required: true required: false
type: string type: string
default: 'linux/amd64,linux/arm64'
cuda_versions: cuda_versions:
description: 'CUDA versions to build (JSON array, e.g., ["12.9", "13.0"])' description: 'CUDA versions to build (JSON array, e.g., ["12.9", "13.0"])'
required: true required: true
...@@ -84,7 +85,7 @@ on: ...@@ -84,7 +85,7 @@ on:
required: true required: true
type: string type: string
extra_tags: extra_tags:
description: 'Additional tags (newline-separated, -$platform suffix auto-appended)' description: 'Additional tags (newline-separated)'
required: false required: false
type: string type: string
default: '' default: ''
...@@ -113,16 +114,6 @@ on: ...@@ -113,16 +114,6 @@ on:
required: false required: false
type: boolean type: boolean
default: false default: false
build_only:
description: 'Build and push only skip all tests, show summary'
required: false
type: boolean
default: false
sanitized_ref_name:
description: 'Sanitized git ref name for branch-tagged images (used with build_only)'
required: false
type: string
default: ''
secrets: secrets:
AWS_DEFAULT_REGION: AWS_DEFAULT_REGION:
required: true required: true
...@@ -148,7 +139,6 @@ jobs: ...@@ -148,7 +139,6 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
platform: ${{ fromJson(inputs.platforms) }}
cuda_version: ${{ fromJson(inputs.cuda_versions) }} cuda_version: ${{ fromJson(inputs.cuda_versions) }}
# This name weirdly shows in the checks overview, but not once you dive into # This name weirdly shows in the checks overview, but not once you dive into
# a specific workflow. Keeping it as a short placeholder # a specific workflow. Keeping it as a short placeholder
...@@ -156,13 +146,13 @@ jobs: ...@@ -156,13 +146,13 @@ jobs:
uses: ./.github/workflows/build-test-distribute-flavor.yml uses: ./.github/workflows/build-test-distribute-flavor.yml
with: with:
framework: ${{ inputs.framework }} framework: ${{ inputs.framework }}
platform: ${{ matrix.platform }} platform: ${{ inputs.platform }}
target: ${{ inputs.target }} target: ${{ inputs.target }}
cuda_version: ${{ matrix.cuda_version }} cuda_version: ${{ matrix.cuda_version }}
extra_tags: ${{ inputs.extra_tags }} extra_tags: ${{ inputs.extra_tags }}
no_cache: ${{ inputs.no_cache }} no_cache: ${{ inputs.no_cache }}
builder_name: ${{ inputs.builder_name }} builder_name: ${{ inputs.builder_name }}
build_image: ${{ inputs.build_only || inputs.build_image }} build_image: ${{ inputs.build_image }}
build_timeout_minutes: ${{ inputs.build_timeout_minutes }} build_timeout_minutes: ${{ inputs.build_timeout_minutes }}
push_image: ${{ inputs.push_image }} push_image: ${{ inputs.push_image }}
run_cpu_only_tests: ${{ inputs.run_cpu_only_tests }} run_cpu_only_tests: ${{ inputs.run_cpu_only_tests }}
...@@ -174,10 +164,8 @@ jobs: ...@@ -174,10 +164,8 @@ jobs:
run_multi_gpu_tests: ${{ inputs.run_multi_gpu_tests }} run_multi_gpu_tests: ${{ inputs.run_multi_gpu_tests }}
multi_gpu_test_markers: ${{ inputs.multi_gpu_test_markers }} multi_gpu_test_markers: ${{ inputs.multi_gpu_test_markers }}
multi_gpu_test_timeout_minutes: ${{ inputs.multi_gpu_test_timeout_minutes }} multi_gpu_test_timeout_minutes: ${{ inputs.multi_gpu_test_timeout_minutes }}
copy_to_acr: ${{ inputs.copy_to_acr && matrix.platform == 'amd64' }} # no reason to copy ARM images to ACR copy_to_acr: ${{ inputs.copy_to_acr }}
copy_timeout_minutes: ${{ inputs.copy_timeout_minutes }} copy_timeout_minutes: ${{ inputs.copy_timeout_minutes }}
make_efa: ${{ inputs.make_efa }} make_efa: ${{ inputs.make_efa }}
show_summary: ${{ inputs.build_only || inputs.show_summary }} show_summary: ${{ inputs.show_summary }}
build_only: ${{ inputs.build_only }}
sanitized_ref_name: ${{ inputs.sanitized_ref_name }}
secrets: inherit secrets: inherit
...@@ -15,7 +15,7 @@ on: ...@@ -15,7 +15,7 @@ on:
required: true required: true
type: string type: string
platform: platform:
description: 'Platform to build (amd64 or arm64)' description: 'Docker platform(s) to build (e.g. linux/amd64,linux/arm64)'
required: true required: true
type: string type: string
cuda_version: cuda_version:
...@@ -84,7 +84,7 @@ on: ...@@ -84,7 +84,7 @@ on:
required: true required: true
type: string type: string
extra_tags: extra_tags:
description: 'Additional tags (newline-separated, -$platform suffix auto-appended)' description: 'Additional tags (newline-separated)'
required: false required: false
type: string type: string
default: '' default: ''
...@@ -118,16 +118,6 @@ on: ...@@ -118,16 +118,6 @@ on:
required: false required: false
type: boolean type: boolean
default: false default: false
build_only:
description: 'Build and push only skip all tests, show summary'
required: false
type: boolean
default: false
sanitized_ref_name:
description: 'Sanitized git ref name for branch-tagged images (used with build_only)'
required: false
type: string
default: ''
secrets: secrets:
AWS_DEFAULT_REGION: AWS_DEFAULT_REGION:
required: true required: true
...@@ -153,12 +143,14 @@ jobs: ...@@ -153,12 +143,14 @@ jobs:
# ============================================================================ # ============================================================================
build: build:
if: inputs.build_image if: inputs.build_image
name: Build cuda${{ inputs.cuda_version }}-${{ inputs.platform }} name: Build cuda${{ inputs.cuda_version }}
runs-on: prod-builder-v3 runs-on: prod-builder-v3
timeout-minutes: ${{ inputs.build_timeout_minutes }} timeout-minutes: ${{ inputs.build_timeout_minutes }}
outputs: outputs:
target_tag_plain: ${{ steps.build.outputs.target_tag_plain }} target_tag_plain: ${{ steps.build.outputs.target_tag_plain }}
test_tag_plain: ${{ steps.build.outputs.test_tag_plain }} test_tag_plain: ${{ steps.build.outputs.test_tag_plain }}
compliance_arches: ${{ steps.compliance-arches.outputs.arches }}
test_runners: ${{ steps.test-runners.outputs.runners }}
env: env:
FRAMEWORK: ${{ inputs.framework }} FRAMEWORK: ${{ inputs.framework }}
steps: steps:
...@@ -166,6 +158,32 @@ jobs: ...@@ -166,6 +158,32 @@ jobs:
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with: with:
lfs: true lfs: true
- name: Compute compliance arches
id: compliance-arches
shell: bash
run: |
PLATFORM="${{ inputs.platform }}"
# Convert comma-separated Docker platform string to JSON array
# "linux/amd64,linux/arm64" -> ["linux/amd64","linux/arm64"]
# "linux/amd64" -> ["linux/amd64"]
JSON=$(printf '%s\n' ${PLATFORM//,/ } | jq -Rsc '[splits("\n") | select(length>0)]')
echo "arches=${JSON}" >> $GITHUB_OUTPUT
- name: Compute test runners
id: test-runners
shell: bash
run: |
PLATFORM="${{ inputs.platform }}"
# Emit a JSON array of {arch, runner} objects for platforms actually being built
# "linux/amd64,linux/arm64" -> both runners
# "linux/amd64" -> amd64 runner only (covers EFA and other single-arch)
# "linux/arm64" -> arm64 runner only
if [[ "$PLATFORM" == *"amd64"* && "$PLATFORM" == *"arm64"* ]]; then
echo 'runners=[{"arch":"amd64","runner":"prod-tester-amd-gpu-v1"},{"arch":"arm64","runner":"prod-tester-arm-v1"}]' >> $GITHUB_OUTPUT
elif [[ "$PLATFORM" == *"arm64"* ]]; then
echo 'runners=[{"arch":"arm64","runner":"prod-tester-arm-v1"}]' >> $GITHUB_OUTPUT
else
echo 'runners=[{"arch":"amd64","runner":"prod-tester-amd-gpu-v1"}]' >> $GITHUB_OUTPUT
fi
- name: Build - name: Build
id: build id: build
uses: ./.github/actions/build-flavor uses: ./.github/actions/build-flavor
...@@ -190,23 +208,23 @@ jobs: ...@@ -190,23 +208,23 @@ jobs:
no_cache: ${{ inputs.no_cache }} no_cache: ${{ inputs.no_cache }}
make_efa: ${{ inputs.make_efa }} make_efa: ${{ inputs.make_efa }}
extra_tags: ${{ inputs.extra_tags }} extra_tags: ${{ inputs.extra_tags }}
build_only: ${{ inputs.build_only }}
sanitized_ref_name: ${{ inputs.sanitized_ref_name }}
show_summary: ${{ inputs.push_image && inputs.show_summary }} show_summary: ${{ inputs.push_image && inputs.show_summary }}
# ============================================================================ # ============================================================================
# TEST # TEST
# ============================================================================ # ============================================================================
test: test:
if: | if: |
!inputs.build_only &&
( inputs.run_cpu_only_tests || inputs.run_single_gpu_tests ) && ( inputs.run_cpu_only_tests || inputs.run_single_gpu_tests ) &&
inputs.build_image inputs.build_image
needs: [build] needs: [build]
name: Test cuda${{ inputs.cuda_version }}-${{ inputs.platform }} name: Test cuda${{ inputs.cuda_version }} (${{ matrix.arch }})
runs-on: ${{ inputs.platform == 'amd64' && 'prod-tester-amd-gpu-v1' || 'prod-tester-arm-v1' }} strategy:
fail-fast: false
matrix:
include: ${{ fromJson(needs.build.outputs.test_runners) }}
runs-on: ${{ matrix.runner }}
env: env:
FRAMEWORK: ${{ inputs.framework }} FRAMEWORK: ${{ inputs.framework }}
steps: steps:
...@@ -219,9 +237,9 @@ jobs: ...@@ -219,9 +237,9 @@ jobs:
CUDA_VERSION_RAW=${{ inputs.cuda_version }} CUDA_VERSION_RAW=${{ inputs.cuda_version }}
CUDA_VERSION=${CUDA_VERSION_RAW%%.*} CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}-${{ inputs.platform }} RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}
echo "runtime_image=${RUNTIME_IMAGE}" >> $GITHUB_OUTPUT echo "runtime_image=${RUNTIME_IMAGE}" >> $GITHUB_OUTPUT
TEST_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_plain }}-cuda${CUDA_VERSION}-${{ inputs.platform }} TEST_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_plain }}-cuda${CUDA_VERSION}
echo "test_image=${TEST_IMAGE}" >> $GITHUB_OUTPUT echo "test_image=${TEST_IMAGE}" >> $GITHUB_OUTPUT
- name: Docker Login - name: Docker Login
uses: ./.github/actions/docker-login uses: ./.github/actions/docker-login
...@@ -273,7 +291,7 @@ jobs: ...@@ -273,7 +291,7 @@ jobs:
pytest_marks: ${{ inputs.cpu_only_test_markers }} pytest_marks: ${{ inputs.cpu_only_test_markers }}
framework: ${{ inputs.framework }} framework: ${{ inputs.framework }}
test_type: "pre_merge_cpu" test_type: "pre_merge_cpu"
platform_arch: ${{ inputs.platform }} platform_arch: ${{ matrix.arch }}
hf_token: ${{ secrets.HF_TOKEN }} hf_token: ${{ secrets.HF_TOKEN }}
parallel_mode: 'auto' parallel_mode: 'auto'
dind_as_sidecar: 'true' dind_as_sidecar: 'true'
...@@ -282,14 +300,14 @@ jobs: ...@@ -282,14 +300,14 @@ jobs:
# These are e2e tests marked with gpu_1 that require GPU hardware # These are e2e tests marked with gpu_1 that require GPU hardware
- name: Run GPU tests (sequential) - name: Run GPU tests (sequential)
timeout-minutes: ${{ inputs.single_gpu_test_timeout_minutes }} timeout-minutes: ${{ inputs.single_gpu_test_timeout_minutes }}
if: ( inputs.platform == 'amd64' && inputs.run_single_gpu_tests == true ) # We only run GPU tests on amd64 if: inputs.run_single_gpu_tests && matrix.arch == 'amd64'
uses: ./.github/actions/pytest uses: ./.github/actions/pytest
with: with:
image_tag: ${{ steps.calculate-target-tag.outputs.test_image }} image_tag: ${{ steps.calculate-target-tag.outputs.test_image }}
pytest_marks: ${{ inputs.single_gpu_test_markers }} pytest_marks: ${{ inputs.single_gpu_test_markers }}
framework: ${{ inputs.framework }} framework: ${{ inputs.framework }}
test_type: "pre_merge_gpu" test_type: "pre_merge_gpu"
platform_arch: ${{ inputs.platform }} platform_arch: ${{ matrix.arch }}
hf_token: ${{ secrets.HF_TOKEN }} hf_token: ${{ secrets.HF_TOKEN }}
parallel_mode: 'none' parallel_mode: 'none'
dind_as_sidecar: 'true' dind_as_sidecar: 'true'
...@@ -301,12 +319,10 @@ jobs: ...@@ -301,12 +319,10 @@ jobs:
multi-gpu-test: multi-gpu-test:
# Multi-GPU support limited to AMD64 only # Multi-GPU support limited to AMD64 only
if: | if: |
!inputs.build_only &&
inputs.run_multi_gpu_tests && inputs.run_multi_gpu_tests &&
inputs.build_image && inputs.build_image
( inputs.platform != 'arm64' )
needs: [build] needs: [build]
name: Multi-gpu test cuda${{ inputs.cuda_version }}-${{ inputs.platform }} name: Multi-gpu test cuda${{ inputs.cuda_version }}
runs-on: prod-tester-amd-gpu-4-v1 runs-on: prod-tester-amd-gpu-4-v1
env: env:
FRAMEWORK: ${{ inputs.framework }} FRAMEWORK: ${{ inputs.framework }}
...@@ -320,9 +336,9 @@ jobs: ...@@ -320,9 +336,9 @@ jobs:
CUDA_VERSION_RAW=${{ inputs.cuda_version }} CUDA_VERSION_RAW=${{ inputs.cuda_version }}
CUDA_VERSION=${CUDA_VERSION_RAW%%.*} CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}-${{ inputs.platform }} RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}
echo "runtime_image=${RUNTIME_IMAGE}" >> $GITHUB_OUTPUT echo "runtime_image=${RUNTIME_IMAGE}" >> $GITHUB_OUTPUT
TEST_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_plain }}-cuda${CUDA_VERSION}-${{ inputs.platform }} TEST_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_plain }}-cuda${CUDA_VERSION}
echo "test_image=${TEST_IMAGE}" >> $GITHUB_OUTPUT echo "test_image=${TEST_IMAGE}" >> $GITHUB_OUTPUT
- name: Docker Login - name: Docker Login
uses: ./.github/actions/docker-login uses: ./.github/actions/docker-login
...@@ -352,7 +368,7 @@ jobs: ...@@ -352,7 +368,7 @@ jobs:
pytest_marks: ${{ inputs.multi_gpu_test_markers }} pytest_marks: ${{ inputs.multi_gpu_test_markers }}
framework: ${{ inputs.framework }} framework: ${{ inputs.framework }}
test_type: "pre_merge_gpu" test_type: "pre_merge_gpu"
platform_arch: ${{ inputs.platform }} platform_arch: amd64
hf_token: ${{ secrets.HF_TOKEN }} hf_token: ${{ secrets.HF_TOKEN }}
parallel_mode: 'none' parallel_mode: 'none'
dind_as_sidecar: 'true' dind_as_sidecar: 'true'
...@@ -364,7 +380,11 @@ jobs: ...@@ -364,7 +380,11 @@ jobs:
compliance: compliance:
if: inputs.build_image && inputs.push_image if: inputs.build_image && inputs.push_image
needs: [build] needs: [build]
name: Compliance cuda${{ inputs.cuda_version }}-${{ inputs.platform }} strategy:
fail-fast: false
matrix:
arch: ${{ fromJson(needs.build.outputs.compliance_arches) }}
name: Compliance cuda${{ inputs.cuda_version }}-${{ matrix.arch }}
runs-on: prod-builder-v3 runs-on: prod-builder-v3
steps: steps:
- name: Checkout repository - name: Checkout repository
...@@ -377,21 +397,24 @@ jobs: ...@@ -377,21 +397,24 @@ jobs:
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }} azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }} azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }} azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
- name: Calculate image URI - name: Calculate image URI and arch suffix
id: images id: images
shell: bash shell: bash
run: | run: |
CUDA_VERSION_RAW=${{ inputs.cuda_version }} CUDA_VERSION_RAW=${{ inputs.cuda_version }}
CUDA_VERSION=${CUDA_VERSION_RAW%%.*} CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
echo "cuda_major=${CUDA_VERSION}" >> $GITHUB_OUTPUT echo "cuda_major=${CUDA_VERSION}" >> $GITHUB_OUTPUT
RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}-${{ inputs.platform }} RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}
echo "runtime_image=${RUNTIME_IMAGE}" >> $GITHUB_OUTPUT echo "runtime_image=${RUNTIME_IMAGE}" >> $GITHUB_OUTPUT
# Sanitize arch for artifact name: linux/amd64 -> amd64 (artifact names can't contain /)
ARCH="${{ matrix.arch }}"
echo "arch_suffix=${ARCH#linux/}" >> $GITHUB_OUTPUT
- name: Compliance scan - name: Compliance scan
uses: ./.github/actions/compliance-scan uses: ./.github/actions/compliance-scan
with: with:
image: ${{ steps.images.outputs.runtime_image }} image: ${{ steps.images.outputs.runtime_image }}
artifact_name: compliance-${{ inputs.framework }}-${{ inputs.target }}${{ inputs.make_efa && '-efa' || '' }}-cuda${{ steps.images.outputs.cuda_major }}-${{ inputs.platform }} artifact_name: compliance-${{ inputs.framework }}-${{ inputs.target }}${{ inputs.make_efa && '-efa' || '' }}-cuda${{ steps.images.outputs.cuda_major }}-${{ steps.images.outputs.arch_suffix }}
arch: ${{ inputs.platform }} arch: ${{ matrix.arch }}
framework: ${{ inputs.framework }} framework: ${{ inputs.framework }}
cuda_version: ${{ inputs.cuda_version }} cuda_version: ${{ inputs.cuda_version }}
...@@ -404,11 +427,10 @@ jobs: ...@@ -404,11 +427,10 @@ jobs:
# Run if copy_to_acr is true AND build succeeded AND (test succeeded OR test was skipped) # Run if copy_to_acr is true AND build succeeded AND (test succeeded OR test was skipped)
if: | if: |
always() && always() &&
!inputs.build_only &&
inputs.copy_to_acr && inputs.copy_to_acr &&
needs.build.result == 'success' && needs.build.result == 'success' &&
(needs.test.result == 'success' || needs.test.result == 'skipped') (needs.test.result == 'success' || needs.test.result == 'skipped')
name: copy-to-acr cuda${{ inputs.cuda_version }}-${{ inputs.platform }} name: copy-to-acr cuda${{ inputs.cuda_version }}
runs-on: prod-default-small-v2 runs-on: prod-default-small-v2
outputs: outputs:
target_tag_plain: ${{ needs.build.outputs.target_tag_plain }} target_tag_plain: ${{ needs.build.outputs.target_tag_plain }}
...@@ -430,12 +452,13 @@ jobs: ...@@ -430,12 +452,13 @@ jobs:
with: with:
source_registry: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com source_registry: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
source_image: ai-dynamo/dynamo source_image: ai-dynamo/dynamo
source_tag: ${{ needs.build.outputs.target_tag_plain }}-cuda${{ steps.calculate-target-tag.outputs.cuda_version_plain }}-${{ inputs.platform }} source_tag: ${{ needs.build.outputs.target_tag_plain }}-cuda${{ steps.calculate-target-tag.outputs.cuda_version_plain }}
target_registry: ${{ secrets.AZURE_ACR_HOSTNAME }} target_registry: ${{ secrets.AZURE_ACR_HOSTNAME }}
target_image: ai-dynamo/dynamo target_image: ai-dynamo/dynamo
target_tag: ${{ needs.build.outputs.target_tag_plain }}-cuda${{ steps.calculate-target-tag.outputs.cuda_version_plain }}-${{ inputs.platform }} target_tag: ${{ needs.build.outputs.target_tag_plain }}-cuda${{ steps.calculate-target-tag.outputs.cuda_version_plain }}
source_aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} source_aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
source_aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} source_aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
target_azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }} target_azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
target_azure_acr_user: ${{ secrets.AZURE_ACR_USER }} target_azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
target_azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }} target_azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
override_arch: amd64 # We are using AMD64 images only on the rest of the clusters.
...@@ -74,7 +74,7 @@ jobs: ...@@ -74,7 +74,7 @@ jobs:
with: with:
builder_name: ${{ needs.changed-files.outputs.builder_name }} builder_name: ${{ needs.changed-files.outputs.builder_name }}
flavor: general flavor: general
arch: amd64 arch: linux/amd64
- name: Docker Login - name: Docker Login
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push' if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push'
uses: ./.github/actions/docker-login uses: ./.github/actions/docker-login
......
...@@ -24,7 +24,7 @@ jobs: ...@@ -24,7 +24,7 @@ jobs:
framework: vllm framework: vllm
target: runtime target: runtime
no_cache: true no_cache: true
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
extra_tags: | extra_tags: |
${{ github.ref_name == 'main' && 'main-vllm' || '' }} ${{ github.ref_name == 'main' && 'main-vllm' || '' }}
...@@ -47,7 +47,7 @@ jobs: ...@@ -47,7 +47,7 @@ jobs:
framework: sglang framework: sglang
target: runtime target: runtime
no_cache: true no_cache: true
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
extra_tags: | extra_tags: |
${{ github.ref_name == 'main' && 'main-sglang' || '' }} ${{ github.ref_name == 'main' && 'main-sglang' || '' }}
...@@ -70,7 +70,7 @@ jobs: ...@@ -70,7 +70,7 @@ jobs:
framework: trtllm framework: trtllm
target: runtime target: runtime
no_cache: true no_cache: true
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["13.1"]' cuda_versions: '["13.1"]'
extra_tags: | extra_tags: |
${{ github.ref_name == 'main' && 'main-trtllm' || '' }} ${{ github.ref_name == 'main' && 'main-trtllm' || '' }}
......
...@@ -25,7 +25,7 @@ jobs: ...@@ -25,7 +25,7 @@ jobs:
with: with:
framework: vllm framework: vllm
target: runtime target: runtime
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
extra_tags: | extra_tags: |
${{ github.ref_name == 'main' && 'main-vllm' || '' }} ${{ github.ref_name == 'main' && 'main-vllm' || '' }}
...@@ -50,7 +50,7 @@ jobs: ...@@ -50,7 +50,7 @@ jobs:
with: with:
framework: sglang framework: sglang
target: runtime target: runtime
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
extra_tags: | extra_tags: |
${{ github.ref_name == 'main' && 'main-sglang' || '' }} ${{ github.ref_name == 'main' && 'main-sglang' || '' }}
...@@ -75,7 +75,7 @@ jobs: ...@@ -75,7 +75,7 @@ jobs:
with: with:
framework: trtllm framework: trtllm
target: runtime target: runtime
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["13.1"]' cuda_versions: '["13.1"]'
extra_tags: | extra_tags: |
${{ github.ref_name == 'main' && 'main-trtllm' || '' }} ${{ github.ref_name == 'main' && 'main-trtllm' || '' }}
...@@ -96,11 +96,11 @@ jobs: ...@@ -96,11 +96,11 @@ jobs:
# ============================================================================ # ============================================================================
vllm-dev-pipeline: vllm-dev-pipeline:
name: vllm-dev name: vllm-dev
uses: ./.github/workflows/build-flavor-matrix.yml uses: ./.github/workflows/build-flavor.yml
with: with:
framework: vllm framework: vllm
target: dev target: dev
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }} builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
build_timeout_minutes: 60 build_timeout_minutes: 60
...@@ -111,11 +111,11 @@ jobs: ...@@ -111,11 +111,11 @@ jobs:
sglang-dev-pipeline: sglang-dev-pipeline:
name: sglang-dev name: sglang-dev
uses: ./.github/workflows/build-flavor-matrix.yml uses: ./.github/workflows/build-flavor.yml
with: with:
framework: sglang framework: sglang
target: dev target: dev
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }} builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
build_timeout_minutes: 60 build_timeout_minutes: 60
...@@ -126,11 +126,11 @@ jobs: ...@@ -126,11 +126,11 @@ jobs:
trtllm-dev-pipeline: trtllm-dev-pipeline:
name: trtllm-dev name: trtllm-dev
uses: ./.github/workflows/build-flavor-matrix.yml uses: ./.github/workflows/build-flavor.yml
with: with:
framework: trtllm framework: trtllm
target: dev target: dev
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["13.1"]' cuda_versions: '["13.1"]'
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }} builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
build_timeout_minutes: 60 build_timeout_minutes: 60
...@@ -150,7 +150,7 @@ jobs: ...@@ -150,7 +150,7 @@ jobs:
with: with:
framework: vllm framework: vllm
target: runtime target: runtime
platforms: '["amd64"]' platform: 'linux/amd64'
cuda_versions: '["12.9"]' cuda_versions: '["12.9"]'
make_efa: true make_efa: true
extra_tags: | extra_tags: |
...@@ -174,7 +174,7 @@ jobs: ...@@ -174,7 +174,7 @@ jobs:
with: with:
framework: trtllm framework: trtllm
target: runtime target: runtime
platforms: '["amd64"]' platform: 'linux/amd64'
cuda_versions: '["13.1"]' cuda_versions: '["13.1"]'
make_efa: true make_efa: true
extra_tags: | extra_tags: |
...@@ -211,7 +211,7 @@ jobs: ...@@ -211,7 +211,7 @@ jobs:
with: with:
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }} builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
flavor: general flavor: general
all_arch: 'true' arch: 'linux/amd64,linux/arm64'
- name: Docker Login - name: Docker Login
uses: ./.github/actions/docker-login uses: ./.github/actions/docker-login
with: with:
...@@ -322,7 +322,7 @@ jobs: ...@@ -322,7 +322,7 @@ jobs:
with: with:
framework: vllm framework: vllm
profiles: '["agg", "agg_router", "disagg", "disagg_router"]' profiles: '["agg", "agg_router", "disagg", "disagg_router"]'
image_suffix: vllm-runtime-cuda12-amd64 image_suffix: vllm-runtime-cuda12
namespace: ${{ needs.deploy-operator.outputs.namespace }} namespace: ${{ needs.deploy-operator.outputs.namespace }}
vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }} vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }}
operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }} operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }}
...@@ -334,7 +334,7 @@ jobs: ...@@ -334,7 +334,7 @@ jobs:
with: with:
framework: sglang framework: sglang
profiles: '["agg", "agg_router"]' profiles: '["agg", "agg_router"]'
image_suffix: sglang-runtime-cuda12-amd64 image_suffix: sglang-runtime-cuda12
namespace: ${{ needs.deploy-operator.outputs.namespace }} namespace: ${{ needs.deploy-operator.outputs.namespace }}
vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }} vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }}
operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }} operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }}
...@@ -346,7 +346,7 @@ jobs: ...@@ -346,7 +346,7 @@ jobs:
with: with:
framework: trtllm framework: trtllm
profiles: '["agg", "agg_router"]' profiles: '["agg", "agg_router"]'
image_suffix: trtllm-runtime-cuda13-amd64 image_suffix: trtllm-runtime-cuda13
namespace: ${{ needs.deploy-operator.outputs.namespace }} namespace: ${{ needs.deploy-operator.outputs.namespace }}
vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }} vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }}
operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }} operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }}
......
...@@ -99,7 +99,7 @@ jobs: ...@@ -99,7 +99,7 @@ jobs:
with: with:
builder_name: ${{ needs.changed-files.outputs.builder_name }} builder_name: ${{ needs.changed-files.outputs.builder_name }}
flavor: general flavor: general
all_arch: 'true' arch: 'linux/amd64,linux/arm64'
- name: Docker Login - name: Docker Login
uses: ./.github/actions/docker-login uses: ./.github/actions/docker-login
with: with:
...@@ -187,7 +187,7 @@ jobs: ...@@ -187,7 +187,7 @@ jobs:
with: with:
framework: vllm framework: vllm
target: runtime target: runtime
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }} builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60 build_timeout_minutes: 60
...@@ -213,7 +213,7 @@ jobs: ...@@ -213,7 +213,7 @@ jobs:
with: with:
framework: sglang framework: sglang
target: runtime target: runtime
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }} builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60 build_timeout_minutes: 60
...@@ -238,7 +238,7 @@ jobs: ...@@ -238,7 +238,7 @@ jobs:
with: with:
framework: trtllm framework: trtllm
target: runtime target: runtime
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["13.1"]' cuda_versions: '["13.1"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }} builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60 build_timeout_minutes: 60
...@@ -259,11 +259,11 @@ jobs: ...@@ -259,11 +259,11 @@ jobs:
name: vllm-dev name: vllm-dev
needs: [changed-files] needs: [changed-files]
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true'
uses: ./.github/workflows/build-flavor-matrix.yml uses: ./.github/workflows/build-flavor.yml
with: with:
framework: vllm framework: vllm
target: dev target: dev
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }} builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60 build_timeout_minutes: 60
...@@ -274,11 +274,11 @@ jobs: ...@@ -274,11 +274,11 @@ jobs:
name: sglang-dev name: sglang-dev
needs: [changed-files] needs: [changed-files]
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true'
uses: ./.github/workflows/build-flavor-matrix.yml uses: ./.github/workflows/build-flavor.yml
with: with:
framework: sglang framework: sglang
target: dev target: dev
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }} builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60 build_timeout_minutes: 60
...@@ -289,11 +289,11 @@ jobs: ...@@ -289,11 +289,11 @@ jobs:
name: trtllm-dev name: trtllm-dev
needs: [changed-files] needs: [changed-files]
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true'
uses: ./.github/workflows/build-flavor-matrix.yml uses: ./.github/workflows/build-flavor.yml
with: with:
framework: trtllm framework: trtllm
target: dev target: dev
platforms: '["amd64", "arm64"]' platform: 'linux/amd64,linux/arm64'
cuda_versions: '["13.1"]' cuda_versions: '["13.1"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }} builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60 build_timeout_minutes: 60
...@@ -343,7 +343,7 @@ jobs: ...@@ -343,7 +343,7 @@ jobs:
with: with:
framework: vllm framework: vllm
profiles: '["agg", "agg_router", "disagg", "disagg_router"]' profiles: '["agg", "agg_router", "disagg", "disagg_router"]'
image_suffix: vllm-runtime-cuda12-amd64 image_suffix: vllm-runtime-cuda12
namespace: ${{ needs.deploy-operator.outputs.namespace }} namespace: ${{ needs.deploy-operator.outputs.namespace }}
vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }} vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }}
operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }} operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }}
...@@ -360,7 +360,7 @@ jobs: ...@@ -360,7 +360,7 @@ jobs:
with: with:
framework: sglang framework: sglang
profiles: '["agg", "agg_router"]' profiles: '["agg", "agg_router"]'
image_suffix: sglang-runtime-cuda12-amd64 image_suffix: sglang-runtime-cuda12
namespace: ${{ needs.deploy-operator.outputs.namespace }} namespace: ${{ needs.deploy-operator.outputs.namespace }}
vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }} vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }}
operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }} operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }}
...@@ -377,7 +377,7 @@ jobs: ...@@ -377,7 +377,7 @@ jobs:
with: with:
framework: trtllm framework: trtllm
profiles: '["agg", "agg_router"]' profiles: '["agg", "agg_router"]'
image_suffix: trtllm-runtime-cuda13-amd64 image_suffix: trtllm-runtime-cuda13
namespace: ${{ needs.deploy-operator.outputs.namespace }} namespace: ${{ needs.deploy-operator.outputs.namespace }}
vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }} vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }}
operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }} operator_tag: ${{ needs.deploy-operator.outputs.operator_tag }}
......
...@@ -189,10 +189,14 @@ jobs: ...@@ -189,10 +189,14 @@ jobs:
echo "========================================" echo "========================================"
copy_image() { copy_image() {
local SRC="$1" DST="$2" LABEL="$3" local SRC="$1" DST="$2" LABEL="$3" PLATFORM="${4:-}"
local PLATFORM_ARG=""
if [ -n "${PLATFORM}" ]; then
PLATFORM_ARG="--platform ${PLATFORM}"
fi
echo "----------------------------------------" echo "----------------------------------------"
echo "Copying: ${LABEL}" echo "Copying: ${LABEL}"
if crane copy "${SRC}" "${DST}"; then if crane copy ${PLATFORM_ARG} "${SRC}" "${DST}"; then
echo " Copied: ${LABEL}" echo " Copied: ${LABEL}"
SUCCESSFUL_COPIES+=("${LABEL}") SUCCESSFUL_COPIES+=("${LABEL}")
return 0 return 0
...@@ -223,9 +227,9 @@ jobs: ...@@ -223,9 +227,9 @@ jobs:
for FRAMEWORK in "${CUDA12_FRAMEWORKS[@]}"; do for FRAMEWORK in "${CUDA12_FRAMEWORKS[@]}"; do
NGC_NAME="${FRAMEWORK}-runtime" NGC_NAME="${FRAMEWORK}-runtime"
for ARCH in "${ARCHITECTURES[@]}"; do for ARCH in "${ARCHITECTURES[@]}"; do
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-${FRAMEWORK}-runtime-cuda12-${ARCH}" SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-${FRAMEWORK}-runtime-cuda12"
TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${NGC_NAME}:${NGC_VERSION_TAG}-${ARCH}" TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${NGC_NAME}:${NGC_VERSION_TAG}-${ARCH}"
copy_image "${SOURCE}" "${TARGET}" "${NGC_NAME}:${NGC_VERSION_TAG}-${ARCH}" copy_image "${SOURCE}" "${TARGET}" "${NGC_NAME}:${NGC_VERSION_TAG}-${ARCH}" "linux/${ARCH}"
done done
create_manifest \ create_manifest \
...@@ -247,9 +251,9 @@ jobs: ...@@ -247,9 +251,9 @@ jobs:
fi fi
for ARCH in "${ARCHITECTURES[@]}"; do for ARCH in "${ARCHITECTURES[@]}"; do
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-${FRAMEWORK}-runtime-cuda13-${ARCH}" SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-${FRAMEWORK}-runtime-cuda13"
TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${NGC_NAME}:${NGC_VERSION_TAG}-cuda13-${ARCH}" TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${NGC_NAME}:${NGC_VERSION_TAG}-cuda13-${ARCH}"
copy_image "${SOURCE}" "${TARGET}" "${NGC_NAME}:${NGC_VERSION_TAG}-cuda13-${ARCH}" copy_image "${SOURCE}" "${TARGET}" "${NGC_NAME}:${NGC_VERSION_TAG}-cuda13-${ARCH}" "linux/${ARCH}"
done done
create_manifest \ create_manifest \
...@@ -264,12 +268,12 @@ jobs: ...@@ -264,12 +268,12 @@ jobs:
echo "=== EFA Runtime Images ===" echo "=== EFA Runtime Images ==="
# vllm EFA (CUDA 12, amd64 only) # vllm EFA (CUDA 12, amd64 only)
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-vllm-runtime-efa-cuda12-amd64" SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-vllm-runtime-efa-cuda12"
TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/vllm-runtime:${NGC_VERSION_TAG}-efa" TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/vllm-runtime:${NGC_VERSION_TAG}-efa"
copy_image "${SOURCE}" "${TARGET}" "vllm-runtime:${NGC_VERSION_TAG}-efa" copy_image "${SOURCE}" "${TARGET}" "vllm-runtime:${NGC_VERSION_TAG}-efa"
# trtllm EFA (CUDA 13, amd64 only) # trtllm EFA (CUDA 13, amd64 only)
SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-trtllm-runtime-efa-cuda13-amd64" SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${COMMIT_SHA}-trtllm-runtime-efa-cuda13"
TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/tensorrtllm-runtime:${NGC_VERSION_TAG}-efa" TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/tensorrtllm-runtime:${NGC_VERSION_TAG}-efa"
copy_image "${SOURCE}" "${TARGET}" "tensorrtllm-runtime:${NGC_VERSION_TAG}-efa" copy_image "${SOURCE}" "${TARGET}" "tensorrtllm-runtime:${NGC_VERSION_TAG}-efa"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment