Unverified Commit 38bf037b authored by Dillon Cullinan's avatar Dillon Cullinan Committed by GitHub
Browse files

ci: OPS-2444: Enable dev builds for pre-merge and post-merge (#7174)


Signed-off-by: default avatarDillon Cullinan <dcullinan@nvidia.com>
parent da354663
...@@ -150,7 +150,9 @@ jobs: ...@@ -150,7 +150,9 @@ jobs:
matrix: matrix:
platform: ${{ fromJson(inputs.platforms) }} platform: ${{ fromJson(inputs.platforms) }}
cuda_version: ${{ fromJson(inputs.cuda_versions) }} cuda_version: ${{ fromJson(inputs.cuda_versions) }}
name: ${{ inputs.framework }}-cuda${{ matrix.cuda_version }}-${{ matrix.platform }} # This name weirdly shows in the checks overview, but not once you dive into
# a specific workflow. Keeping it as a short placeholder
name: matrix
uses: ./.github/workflows/build-test-distribute-flavor.yml uses: ./.github/workflows/build-test-distribute-flavor.yml
with: with:
framework: ${{ inputs.framework }} framework: ${{ inputs.framework }}
......
...@@ -158,7 +158,7 @@ jobs: ...@@ -158,7 +158,7 @@ jobs:
# ============================================================================ # ============================================================================
build: build:
if: inputs.build_image if: inputs.build_image
name: Build ${{ inputs.framework }}-cuda${{ inputs.cuda_version }}-${{ inputs.platform }} name: Build cuda${{ inputs.cuda_version }}-${{ inputs.platform }}
runs-on: prod-builder-v3 runs-on: prod-builder-v3
outputs: outputs:
target_tag_plain: ${{ steps.calculate-target-tag.outputs.target_tag_plain }} target_tag_plain: ${{ steps.calculate-target-tag.outputs.target_tag_plain }}
...@@ -188,8 +188,8 @@ jobs: ...@@ -188,8 +188,8 @@ jobs:
if [ "${{ inputs.make_efa }}" == "true" ]; then if [ "${{ inputs.make_efa }}" == "true" ]; then
EFA_SUFFIX="-efa" EFA_SUFFIX="-efa"
fi fi
TARGET_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}${EFA_SUFFIX}" TARGET_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}-${{ inputs.target }}${EFA_SUFFIX}"
TEST_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}${EFA_SUFFIX}-test" TEST_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}-${{ inputs.target }}${EFA_SUFFIX}-test"
DEFAULT_TARGET_IMAGE_URI="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG_PLAIN}-cuda${CUDA_VERSION}-${{ inputs.platform }}" DEFAULT_TARGET_IMAGE_URI="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG_PLAIN}-cuda${CUDA_VERSION}-${{ inputs.platform }}"
TEST_IMAGE_URI="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${TEST_TAG_PLAIN}-cuda${CUDA_VERSION}-${{ inputs.platform }}" TEST_IMAGE_URI="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${TEST_TAG_PLAIN}-cuda${CUDA_VERSION}-${{ inputs.platform }}"
echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT
...@@ -263,7 +263,7 @@ jobs: ...@@ -263,7 +263,7 @@ jobs:
${MAKE_EFA_FLAG} \ ${MAKE_EFA_FLAG} \
--show-result \ --show-result \
--output-short-filename --output-short-filename
- name: Build and Push Runtime Image - name: Build and Push Image
id: build-image id: build-image
timeout-minutes: ${{ inputs.build_timeout_minutes }} timeout-minutes: ${{ inputs.build_timeout_minutes }}
uses: ./.github/actions/docker-remote-build uses: ./.github/actions/docker-remote-build
...@@ -285,6 +285,7 @@ jobs: ...@@ -285,6 +285,7 @@ jobs:
extra_build_args: | extra_build_args: |
DYNAMO_COMMIT_SHA=${{ github.sha }} DYNAMO_COMMIT_SHA=${{ github.sha }}
- name: Build and Push Test Image - name: Build and Push Test Image
if: ${{ inputs.target != 'dev' }} # no need to build a separate test image for dev as its not tested
shell: bash shell: bash
env: env:
ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
...@@ -347,7 +348,7 @@ jobs: ...@@ -347,7 +348,7 @@ jobs:
( inputs.run_cpu_only_tests || inputs.run_single_gpu_tests ) && ( inputs.run_cpu_only_tests || inputs.run_single_gpu_tests ) &&
inputs.build_image inputs.build_image
needs: [build] needs: [build]
name: Test ${{ inputs.framework }}-cuda${{ inputs.cuda_version }}-${{ inputs.platform }} name: Test cuda${{ inputs.cuda_version }}-${{ inputs.platform }}
runs-on: ${{ inputs.platform == 'amd64' && 'prod-tester-amd-gpu-v1' || 'prod-tester-arm-v1' }} runs-on: ${{ inputs.platform == 'amd64' && 'prod-tester-amd-gpu-v1' || 'prod-tester-arm-v1' }}
env: env:
FRAMEWORK: ${{ inputs.framework }} FRAMEWORK: ${{ inputs.framework }}
...@@ -450,7 +451,7 @@ jobs: ...@@ -450,7 +451,7 @@ jobs:
inputs.build_image && inputs.build_image &&
( inputs.platform != 'arm64' ) ( inputs.platform != 'arm64' )
needs: [build] needs: [build]
name: Multi-gpu Test ${{ inputs.framework }}-cuda${{ inputs.cuda_version }}-${{ inputs.platform }} name: Multi-gpu test cuda${{ inputs.cuda_version }}-${{ inputs.platform }}
runs-on: prod-tester-amd-gpu-4-v1 runs-on: prod-tester-amd-gpu-4-v1
env: env:
FRAMEWORK: ${{ inputs.framework }} FRAMEWORK: ${{ inputs.framework }}
...@@ -515,7 +516,7 @@ jobs: ...@@ -515,7 +516,7 @@ jobs:
inputs.copy_to_acr && inputs.copy_to_acr &&
needs.build.result == 'success' && needs.build.result == 'success' &&
(needs.test.result == 'success' || needs.test.result == 'skipped') (needs.test.result == 'success' || needs.test.result == 'skipped')
name: copy ${{ inputs.framework }}-cuda${{ inputs.cuda_version }}-${{ inputs.platform }} name: copy-to-acr cuda${{ inputs.cuda_version }}-${{ inputs.platform }}
runs-on: prod-default-small-v2 runs-on: prod-default-small-v2
outputs: outputs:
target_tag_plain: ${{ needs.build.outputs.target_tag_plain }} target_tag_plain: ${{ needs.build.outputs.target_tag_plain }}
......
...@@ -20,6 +20,7 @@ jobs: ...@@ -20,6 +20,7 @@ jobs:
# VLLM PIPELINE # VLLM PIPELINE
# ============================================================================ # ============================================================================
vllm-pipeline: vllm-pipeline:
name: vllm
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with: with:
framework: vllm framework: vllm
...@@ -44,6 +45,7 @@ jobs: ...@@ -44,6 +45,7 @@ jobs:
# SGLANG PIPELINE # SGLANG PIPELINE
# ============================================================================ # ============================================================================
sglang-pipeline: sglang-pipeline:
name: sglang
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with: with:
framework: sglang framework: sglang
...@@ -68,6 +70,7 @@ jobs: ...@@ -68,6 +70,7 @@ jobs:
# TRTLLM PIPELINE # TRTLLM PIPELINE
# ============================================================================ # ============================================================================
trtllm-pipeline: trtllm-pipeline:
name: trtllm
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with: with:
framework: trtllm framework: trtllm
...@@ -88,6 +91,54 @@ jobs: ...@@ -88,6 +91,54 @@ jobs:
multi_gpu_test_timeout_minutes: 60 multi_gpu_test_timeout_minutes: 60
secrets: inherit secrets: inherit
# ============================================================================
# DEV PIPELINES
# ============================================================================
vllm-dev-pipeline:
name: vllm-dev
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: vllm
target: dev
platforms: '["amd64", "arm64"]'
cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60
run_cpu_only_tests: false
run_single_gpu_tests: false
run_multi_gpu_tests: false
secrets: inherit
sglang-dev-pipeline:
name: sglang-dev
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: sglang
target: dev
platforms: '["amd64", "arm64"]'
cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60
run_cpu_only_tests: false
run_single_gpu_tests: false
run_multi_gpu_tests: false
secrets: inherit
trtllm-dev-pipeline:
name: trtllm-dev
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: trtllm
target: dev
platforms: '["amd64", "arm64"]'
cuda_versions: '["13.1"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60
run_cpu_only_tests: false
run_single_gpu_tests: false
run_multi_gpu_tests: false
secrets: inherit
# ============================================================================ # ============================================================================
# EFA PIPELINES (Build only, amd64) # EFA PIPELINES (Build only, amd64)
# ============================================================================ # ============================================================================
...@@ -321,7 +372,7 @@ jobs: ...@@ -321,7 +372,7 @@ jobs:
hf_token: ${{ secrets.HF_TOKEN }} hf_token: ${{ secrets.HF_TOKEN }}
framework: ${{ env.FRAMEWORK }} framework: ${{ env.FRAMEWORK }}
profile: ${{ matrix.profile }} profile: ${{ matrix.profile }}
image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-vllm-cuda12-amd64 image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-vllm-runtime-cuda12-amd64
platform_arch: amd64 platform_arch: amd64
deploy-test-sglang: deploy-test-sglang:
...@@ -354,7 +405,7 @@ jobs: ...@@ -354,7 +405,7 @@ jobs:
hf_token: ${{ secrets.HF_TOKEN }} hf_token: ${{ secrets.HF_TOKEN }}
framework: ${{ env.FRAMEWORK }} framework: ${{ env.FRAMEWORK }}
profile: ${{ matrix.profile }} profile: ${{ matrix.profile }}
image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-sglang-cuda12-amd64 image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-sglang-runtime-cuda12-amd64
platform_arch: amd64 platform_arch: amd64
deploy-test-trtllm: deploy-test-trtllm:
...@@ -391,7 +442,7 @@ jobs: ...@@ -391,7 +442,7 @@ jobs:
hf_token: ${{ secrets.HF_TOKEN }} hf_token: ${{ secrets.HF_TOKEN }}
framework: ${{ env.FRAMEWORK }} framework: ${{ env.FRAMEWORK }}
profile: ${{ matrix.profile }} profile: ${{ matrix.profile }}
image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-trtllm-cuda13-amd64 image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-trtllm-runtime-cuda13-amd64
platform_arch: amd64 platform_arch: amd64
deploy-status-check: deploy-status-check:
......
...@@ -180,6 +180,7 @@ jobs: ...@@ -180,6 +180,7 @@ jobs:
# VLLM PIPELINE # VLLM PIPELINE
# ============================================================================ # ============================================================================
vllm-pipeline: vllm-pipeline:
name: vllm
needs: [changed-files] needs: [changed-files]
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' || needs.changed-files.outputs.deploy == 'true' if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' || needs.changed-files.outputs.deploy == 'true'
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
...@@ -203,6 +204,7 @@ jobs: ...@@ -203,6 +204,7 @@ jobs:
# SGLANG PIPELINE # SGLANG PIPELINE
# ============================================================================ # ============================================================================
sglang-pipeline: sglang-pipeline:
name: sglang
needs: [changed-files] needs: [changed-files]
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' || needs.changed-files.outputs.deploy == 'true' if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' || needs.changed-files.outputs.deploy == 'true'
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
...@@ -225,6 +227,7 @@ jobs: ...@@ -225,6 +227,7 @@ jobs:
# TRTLLM PIPELINE # TRTLLM PIPELINE
# ============================================================================ # ============================================================================
trtllm-pipeline: trtllm-pipeline:
name: trtllm
needs: [changed-files] needs: [changed-files]
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' || needs.changed-files.outputs.deploy == 'true' if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' || needs.changed-files.outputs.deploy == 'true'
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
...@@ -243,6 +246,65 @@ jobs: ...@@ -243,6 +246,65 @@ jobs:
run_multi_gpu_tests: false # TODO: Dmitry is working on fixing markers for multi-GPU tests, can enable after that is resolved run_multi_gpu_tests: false # TODO: Dmitry is working on fixing markers for multi-GPU tests, can enable after that is resolved
secrets: inherit secrets: inherit
# ============================================================================
# DEV PIPELINES
# ============================================================================
vllm-dev-pipeline:
name: vllm-dev
needs: [changed-files]
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true'
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: vllm
target: dev
platforms: '["amd64", "arm64"]'
cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60
push_image: false # Only push dev images on main
copy_to_acr: false
run_cpu_only_tests: false
run_single_gpu_tests: false
run_multi_gpu_tests: false
secrets: inherit
sglang-dev-pipeline:
name: sglang-dev
needs: [changed-files]
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true'
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: sglang
target: dev
platforms: '["amd64", "arm64"]'
cuda_versions: '["12.9", "13.0"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60
push_image: false # Only push dev images on main
copy_to_acr: false
run_cpu_only_tests: false
run_single_gpu_tests: false
run_multi_gpu_tests: false
secrets: inherit
trtllm-dev-pipeline:
name: trtllm-dev
needs: [changed-files]
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true'
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: trtllm
target: dev
platforms: '["amd64", "arm64"]'
cuda_versions: '["13.1"]'
builder_name: ${{ needs.changed-files.outputs.builder_name }}
build_timeout_minutes: 60
push_image: false # Only push dev images on main
copy_to_acr: false
run_cpu_only_tests: false
run_single_gpu_tests: false
run_multi_gpu_tests: false
secrets: inherit
# ============================================================================ # ============================================================================
# DEPLOYMENT JOBS # DEPLOYMENT JOBS
...@@ -339,7 +401,7 @@ jobs: ...@@ -339,7 +401,7 @@ jobs:
hf_token: ${{ secrets.HF_TOKEN }} hf_token: ${{ secrets.HF_TOKEN }}
framework: ${{ env.FRAMEWORK }} framework: ${{ env.FRAMEWORK }}
profile: ${{ matrix.profile }} profile: ${{ matrix.profile }}
image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-vllm-cuda12-amd64 image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-vllm-runtime-cuda12-amd64
platform_arch: amd64 platform_arch: amd64
deploy-test-sglang: deploy-test-sglang:
...@@ -376,7 +438,7 @@ jobs: ...@@ -376,7 +438,7 @@ jobs:
hf_token: ${{ secrets.HF_TOKEN }} hf_token: ${{ secrets.HF_TOKEN }}
framework: ${{ env.FRAMEWORK }} framework: ${{ env.FRAMEWORK }}
profile: ${{ matrix.profile }} profile: ${{ matrix.profile }}
image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-sglang-cuda12-amd64 image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-sglang-runtime-cuda12-amd64
platform_arch: amd64 platform_arch: amd64
deploy-test-trtllm: deploy-test-trtllm:
...@@ -417,7 +479,7 @@ jobs: ...@@ -417,7 +479,7 @@ jobs:
hf_token: ${{ secrets.HF_TOKEN }} hf_token: ${{ secrets.HF_TOKEN }}
framework: ${{ env.FRAMEWORK }} framework: ${{ env.FRAMEWORK }}
profile: ${{ matrix.profile }} profile: ${{ matrix.profile }}
image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-trtllm-cuda13-amd64 image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-trtllm-runtime-cuda13-amd64
platform_arch: amd64 platform_arch: amd64
# ============================================================================ # ============================================================================
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment