Unverified Commit 7e48f3bd authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

ci: fold container-validation-dynamo into pr, post-merge, and nightly (#8525)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
parent dfde02c5
...@@ -52,18 +52,15 @@ Example: `lib/**/*.rs` matches all Rust files under `lib/`. ...@@ -52,18 +52,15 @@ Example: `lib/**/*.rs` matches all Rust files under `lib/`.
## Adding a New Filter Group ## Adding a New Filter Group
If you create a new filter in `filters.yaml`, you must also update the workflows: If you create a new filter in `filters.yaml`, you must also update the shared
changed-files action so the coverage check knows about it:
1. Add the filter to `filters.yaml`
2. Update **both** workflow files to include the new filter in the uncovered files check: 1. Add the filter to `filters.yaml`.
- `.github/workflows/container-validation-backends.yml` 2. Edit `.github/actions/changed-files/action.yml`:
- `.github/workflows/container-validation-dynamo.yml` - Expose the new filter as an output (see the existing `core`, `planner`,
`vllm`, `sglang`, `trtllm`, etc. entries at the top of the file).
In each workflow, find the `COVERED_FILES` line and add your new filter: - Add its `*_all_modified_files` to the `COVERED_FILES` line in the
"Check for uncovered files" step.
```bash
COVERED_FILES=$(echo "... ${{ steps.filter.outputs.YOURFILTER_all_modified_files }} ..." | ...)
```
If you skip this step, CI will fail with "uncovered files" even though your filter exists. If you skip this step, CI will fail with "uncovered files" even though your filter exists.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
name: NVIDIA Dynamo Github Validation
on:
push:
branches:
- main
- release/*.*.*
- "pull-request/[0-9]+"
concurrency:
# The group name is a ternary operation. If the ref_name is 'main',
# then the group name uses the run_id to ensure a unique group for
# 'main' pushes. Otherwise, the group name is the ref_name, so that
# workflows on the same PR/branch have the same group name for cancelling.
group: dynamo-build-test-${{ github.ref_name == 'main' && github.run_id || github.ref_name }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
env:
CUDA_VERSION: '12.9'
BUILDER_NAME: b-${{ github.run_id }}-${{ github.run_attempt }}
jobs:
changed-files:
runs-on: ubuntu-latest
outputs:
core: ${{ github.ref_name == 'main' || startsWith(github.ref_name, 'release/') || steps.changes.outputs.core == 'true' }}
planner: ${{ github.ref_name == 'main' || startsWith(github.ref_name, 'release/') || steps.changes.outputs.planner == 'true' }}
frontend: ${{ github.ref_name == 'main' || startsWith(github.ref_name, 'release/') || steps.changes.outputs.frontend == 'true' }}
sglang: ${{ github.ref_name == 'main' || startsWith(github.ref_name, 'release/') || steps.changes.outputs.sglang == 'true' }}
vllm: ${{ github.ref_name == 'main' || startsWith(github.ref_name, 'release/') || steps.changes.outputs.vllm == 'true' }}
trtllm: ${{ github.ref_name == 'main' || startsWith(github.ref_name, 'release/') || steps.changes.outputs.trtllm == 'true' }}
builder_name: ${{ steps.export-builder-name.outputs.builder_name }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
# Do not use fetch-depth: 0 — changed-files now works with shallow clone
- name: Check for changes
id: changes
uses: ./.github/actions/changed-files
with:
gh_token: ${{ github.token }}
- name: Export builder name
id: export-builder-name
run: |
echo "builder_name=${{ env.BUILDER_NAME }}" >> $GITHUB_OUTPUT
dynamo-status-check:
runs-on: ubuntu-latest
needs: [changed-files, build, rust-checks, mypy, test-parallel, test-sequential, test-generic-gpu]
if: always()
steps:
- name: "Check all dependent jobs"
run: |
echo '${{ toJson(needs) }}' | jq -e 'to_entries | map(.value.result) | all(. as $result | ["success", "skipped"] | any($result == .))'
build:
needs: changed-files
if: >-
needs.changed-files.outputs.core == 'true' ||
needs.changed-files.outputs.planner == 'true' ||
needs.changed-files.outputs.frontend == 'true' ||
needs.changed-files.outputs.sglang == 'true' ||
needs.changed-files.outputs.vllm == 'true' ||
needs.changed-files.outputs.trtllm == 'true'
runs-on: prod-builder-v3
name: Build
timeout-minutes: 60
outputs:
# Only pass the non-secret tag suffix between jobs (GitHub blanks outputs containing secrets)
runtime_tag_suffix: ${{ steps.define_image_tag.outputs.runtime_tag_suffix }}
test_tag_suffix: ${{ steps.define_image_tag.outputs.test_tag_suffix }}
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
lfs: true
- name: Initialize Dynamo Builder
uses: ./.github/actions/init-dynamo-builder
with:
builder_name: ${{ needs.changed-files.outputs.builder_name }}
flavor: general
arch: linux/amd64
- name: Docker Login
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push'
uses: ./.github/actions/docker-login
with:
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
- name: Define Image Tag
id: define_image_tag
env:
ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
run: |
CUDA_MAJOR=${CUDA_VERSION%%.*}
RUNTIME_TAG_SUFFIX="${{ github.sha }}-dynamo-runtime-cuda${CUDA_MAJOR}-amd64"
TEST_TAG_SUFFIX="${{ github.sha }}-dynamo-test-cuda${CUDA_MAJOR}-amd64"
echo "runtime_tag_suffix=${RUNTIME_TAG_SUFFIX}" >> $GITHUB_OUTPUT
echo "test_tag_suffix=${TEST_TAG_SUFFIX}" >> $GITHUB_OUTPUT
echo "runtime_remote_tag=${ECR_HOSTNAME}/ai-dynamo/dynamo:${RUNTIME_TAG_SUFFIX}" >> $GITHUB_OUTPUT
echo "test_remote_tag=${ECR_HOSTNAME}/ai-dynamo/dynamo:${TEST_TAG_SUFFIX}" >> $GITHUB_OUTPUT
- name: Generate Dockerfile
shell: bash
run: |
echo "Generating Dockerfile for target: runtime and framework: dynamo"
python ./container/render.py \
--target=runtime \
--framework=dynamo \
--platform=amd64 \
--cuda-version=${{ env.CUDA_VERSION }} \
--show-result \
--output-short-filename
- name: Build and Push Runtime Image
uses: ./.github/actions/docker-remote-build
with:
image_tag: ${{ steps.define_image_tag.outputs.runtime_remote_tag }}
framework: dynamo
target: runtime
platform: amd64
cuda_version: ${{ env.CUDA_VERSION }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
push_image: 'true'
- name: Build and Push Test Image
env:
ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
shell: bash
run: |
CACHE_TAG="test-dynamo-cuda${CUDA_VERSION%%.*}-amd64-cache"
CACHE_ARGS="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${CACHE_TAG}"
CACHE_ARGS+=" --cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${CACHE_TAG}"
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
CACHE_ARGS+=" --cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${CACHE_TAG},mode=max"
elif [[ "$GITHUB_REF_NAME" =~ ^release ]]; then
CACHE_ARGS+=" --cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${CACHE_TAG},mode=max"
fi
docker buildx build \
--progress=plain \
--push \
--platform linux/amd64 \
-f container/Dockerfile.test \
--build-arg BASE_IMAGE=${{ steps.define_image_tag.outputs.runtime_remote_tag }} \
${CACHE_ARGS} \
-t ${{ steps.define_image_tag.outputs.test_remote_tag }} .
rust-checks:
needs: [changed-files, build]
if: needs.changed-files.outputs.core == 'true'
runs-on: prod-tester-amd-gpu-v1
name: Rust Checks
timeout-minutes: 30
env:
CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_rust_dynamo
IMAGE_TAG: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.runtime_tag_suffix }}
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Docker Login
uses: ./.github/actions/docker-login
with:
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
- name: Pull runtime image
run: |
source ./.github/scripts/retry_docker.sh
retry_pull ${{ env.IMAGE_TAG }}
- name: Run Rust checks (block-manager + media-ffmpeg + integration tests)
run: |
docker run --rm --runtime=nvidia --gpus all --user root -w /workspace/lib/llm \
--name ${{ env.CONTAINER_ID }}_rust_checks \
-e SCCACHE_BUCKET=${{ secrets.SCCACHE_S3_BUCKET }} \
-e SCCACHE_REGION=${{ secrets.AWS_DEFAULT_REGION }} \
-e AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }} \
-e AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }} \
${{ env.IMAGE_TAG }} \
bash -ec 'ARCH_ALT=x86_64 /workspace/container/use-sccache.sh install && \
eval $(/workspace/container/use-sccache.sh setup-env) && \
rustup component add rustfmt clippy && \
cargo fmt -- --check && \
cargo clippy --features block-manager,media-ffmpeg,testing-nixl,integration --no-deps --all-targets -- -D warnings && \
cargo test --locked --all-targets --features=block-manager,media-ffmpeg,testing-nixl,integration -- --nocapture && \
cargo clippy -p kvbm-physical --no-deps --all-targets -- -D warnings && \
cargo test --locked -p kvbm-physical --features testing-kvbm -- --nocapture --test-threads=4 && \
/workspace/container/use-sccache.sh show-stats "Rust Checks"'
mypy:
needs: [changed-files, build]
if: >-
needs.changed-files.outputs.core == 'true' ||
needs.changed-files.outputs.planner == 'true' ||
needs.changed-files.outputs.frontend == 'true' ||
needs.changed-files.outputs.sglang == 'true' ||
needs.changed-files.outputs.vllm == 'true' ||
needs.changed-files.outputs.trtllm == 'true'
runs-on: prod-tester-amd-v1
name: Mypy
timeout-minutes: 15
env:
IMAGE_TAG: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_suffix }}
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Docker Login
uses: ./.github/actions/docker-login
with:
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
- name: Pull test image
run: |
source ./.github/scripts/retry_docker.sh
retry_pull ${{ env.IMAGE_TAG }}
- name: Run mypy
run: |
docker run --rm -w /workspace \
--name mypy_${{ github.run_id }}_${{ github.run_attempt }} \
${{ env.IMAGE_TAG }} \
bash -c '
MYPYPATH=components/src:lib/bindings/python/src
# Always check shared (non-backend) code
TARGETS=$(find components/src/dynamo -maxdepth 1 -mindepth 1 -type d ! -name sglang ! -name vllm ! -name trtllm | sort | tr "\n" " ")
# Add only the backends that changed
${{ needs.changed-files.outputs.sglang == 'true' && 'TARGETS="$TARGETS components/src/dynamo/sglang"' || ':' }}
${{ needs.changed-files.outputs.vllm == 'true' && 'TARGETS="$TARGETS components/src/dynamo/vllm"' || ':' }}
${{ needs.changed-files.outputs.trtllm == 'true' && 'TARGETS="$TARGETS components/src/dynamo/trtllm"' || ':' }}
MYPYPATH=$MYPYPATH mypy --explicit-package-bases $TARGETS
'
docker run --rm -w /workspace \
--name mypy_bindings_${{ github.run_id }}_${{ github.run_attempt }} \
${{ env.IMAGE_TAG }} \
bash -c 'MYPYPATH=lib/bindings/python/src mypy -p dynamo'
test-parallel:
needs: [changed-files, build, mypy]
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.planner == 'true' || needs.changed-files.outputs.frontend == 'true'
runs-on: prod-builder-amd-v1
name: Pytest (parallel)
timeout-minutes: 30
env:
IMAGE_TAG: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_suffix }}
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Docker Login
uses: ./.github/actions/docker-login
with:
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
- name: Pull test image
run: |
source ./.github/scripts/retry_docker.sh
retry_pull ${{ env.IMAGE_TAG }}
- name: Run pytest (parallel tests with xdist)
uses: ./.github/actions/pytest
with:
image_tag: ${{ env.IMAGE_TAG }}
pytest_marks: "pre_merge and parallel and not (vllm or sglang or trtllm) and (gpu_0)"
test_suite_name: dynamo
test_type: "pre_merge_parallel"
platform_arch: amd64
hf_token: ${{ secrets.HF_TOKEN }}
parallel_mode: '4'
dind_as_sidecar: 'false'
test-sequential:
needs: [changed-files, build, mypy]
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.planner == 'true' || needs.changed-files.outputs.frontend == 'true'
runs-on: prod-builder-amd-v1
name: Pytest (sequential)
timeout-minutes: 30
env:
IMAGE_TAG: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_suffix }}
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Docker Login
uses: ./.github/actions/docker-login
with:
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
- name: Pull test image
run: |
source ./.github/scripts/retry_docker.sh
retry_pull ${{ env.IMAGE_TAG }}
- name: Run pytest (sequential tests)
uses: ./.github/actions/pytest
with:
image_tag: ${{ env.IMAGE_TAG }}
pytest_marks: "pre_merge and not parallel and not (vllm or sglang or trtllm) and (gpu_0)"
test_suite_name: dynamo
test_type: "pre_merge_sequential"
platform_arch: amd64
hf_token: ${{ secrets.HF_TOKEN }}
parallel_mode: 'none'
dind_as_sidecar: 'false'
test-generic-gpu:
needs: [changed-files, build, mypy]
if: needs.changed-files.outputs.core == 'true'
runs-on: prod-tester-amd-gpu-v1
name: Pytest (GPU)
timeout-minutes: 30
env:
IMAGE_TAG: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_suffix }}
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Docker Login
uses: ./.github/actions/docker-login
with:
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
- name: Pull test image
run: |
source ./.github/scripts/retry_docker.sh
retry_pull ${{ env.IMAGE_TAG }}
- name: Run pytest (gpu)
uses: ./.github/actions/pytest
with:
image_tag: ${{ env.IMAGE_TAG }}
pytest_marks: "pre_merge and none and gpu_1"
framework: dynamo
test_type: "pre_merge_gpu"
platform_arch: amd64
hf_token: ${{ secrets.HF_TOKEN }}
parallel_mode: 'none'
dind_as_sidecar: 'true'
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# Dynamo Pipeline — single entry point for building the dynamo runtime image
# and running all of its associated checks (rust, mypy, pytest parallel/
# sequential/GPU).
name: Dynamo Pipeline
on:
workflow_call:
inputs:
builder_name:
description: 'BuildKit builder name'
required: true
type: string
cuda_version:
description: 'CUDA version for the runtime image (e.g. 12.9)'
required: false
type: string
default: '12.9'
fresh_builder:
description: 'Always create a fresh K8s BuildKit builder (for nightly)'
required: false
type: boolean
default: false
build_timeout_minutes:
required: false
type: number
default: 30
no_cache:
description: 'Disable BuildKit cache-from/to. Nightly sets true for regression detection.'
required: false
type: boolean
default: false
cpu_parallel_test_markers:
required: true
type: string
cpu_sequential_test_markers:
required: true
type: string
gpu_test_markers:
required: true
type: string
secrets:
AWS_DEFAULT_REGION:
required: true
AWS_ACCOUNT_ID:
required: true
AZURE_ACR_HOSTNAME:
required: true
AZURE_ACR_USER:
required: true
AZURE_ACR_PASSWORD:
required: true
SCCACHE_S3_BUCKET:
required: false
HF_TOKEN:
required: false
jobs:
image:
uses: ./.github/workflows/shared-build-image.yml
with:
framework: dynamo
target: runtime
cuda_version: '["${{ inputs.cuda_version }}"]'
platform: 'linux/amd64,linux/arm64'
builder_name: ${{ inputs.builder_name }}
fresh_builder: ${{ inputs.fresh_builder }}
no_cache: ${{ inputs.no_cache }}
build_timeout_minutes: ${{ inputs.build_timeout_minutes }}
secrets: inherit
# rust-gpu-checks + mypy run inside the built images because they depend on
# native libs (NIXL, ffmpeg, CUDA) and pinned Python package versions that
# aren't reproducible on plain ubuntu-latest runners.
rust-gpu:
needs: image
runs-on: prod-tester-amd-gpu-v1
timeout-minutes: 30
env:
CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_rust_dynamo
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Calculate runtime image tag
id: image
shell: bash
env:
ECR_REPOSITORY: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo
run: |
CUDA_VERSION="${{ inputs.cuda_version }}"
CUDA_MAJOR=${CUDA_VERSION%%.*}
IMAGE_TAG=${{ github.sha }}-${{ needs.image.outputs.target_tag_plain }}-cuda${CUDA_MAJOR}
echo "runtime_image=${ECR_REPOSITORY}:${IMAGE_TAG}" >> $GITHUB_OUTPUT
- name: Docker Login
uses: ./.github/actions/docker-login
with:
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
- name: Pull runtime image
run: |
source ./.github/scripts/retry_docker.sh
retry_pull ${{ steps.image.outputs.runtime_image }}
- name: Run Rust checks (block-manager + media-ffmpeg + integration tests)
env:
SCCACHE_S3_BUCKET: ${{ secrets.SCCACHE_S3_BUCKET }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
run: |
docker run --rm --runtime=nvidia --gpus all --user root -w /workspace/lib/llm \
--name ${{ env.CONTAINER_ID }}_rust_checks \
-e SCCACHE_BUCKET="${SCCACHE_S3_BUCKET}" \
-e SCCACHE_REGION="${AWS_DEFAULT_REGION}" \
-e AWS_ROLE_ARN \
-e AWS_WEB_IDENTITY_TOKEN_FILE=/run/secrets/aws-token \
-v "${AWS_WEB_IDENTITY_TOKEN_FILE}:/run/secrets/aws-token:ro" \
${{ steps.image.outputs.runtime_image }} \
bash -ec 'ARCH_ALT=x86_64 /workspace/container/use-sccache.sh install && \
eval $(/workspace/container/use-sccache.sh setup-env) && \
rustup component add rustfmt clippy && \
cargo fmt -- --check && \
cargo clippy --features block-manager,media-ffmpeg,testing-nixl,integration --no-deps --all-targets -- -D warnings && \
cargo test --locked --all-targets --features=block-manager,media-ffmpeg,testing-nixl,integration -- --nocapture && \
cargo clippy -p kvbm-physical --no-deps --all-targets -- -D warnings && \
cargo test --locked -p kvbm-physical --features testing-kvbm -- --nocapture --test-threads=4 && \
/workspace/container/use-sccache.sh show-stats "Rust Checks"'
mypy:
needs: image
runs-on: prod-tester-amd-v1
timeout-minutes: 15
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Calculate test image tag
id: image
shell: bash
env:
ECR_REPOSITORY: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo
run: |
CUDA_VERSION="${{ inputs.cuda_version }}"
CUDA_MAJOR=${CUDA_VERSION%%.*}
IMAGE_TAG=${{ github.sha }}-${{ needs.image.outputs.target_tag_plain }}-cuda${CUDA_MAJOR}-test
echo "test_image=${ECR_REPOSITORY}:${IMAGE_TAG}" >> $GITHUB_OUTPUT
- name: Docker Login
uses: ./.github/actions/docker-login
with:
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
- name: Pull test image
run: |
source ./.github/scripts/retry_docker.sh
retry_pull ${{ steps.image.outputs.test_image }}
- name: Run mypy (components)
run: |
docker run --rm -w /workspace \
--name mypy_${{ github.run_id }}_${{ github.run_attempt }} \
${{ steps.image.outputs.test_image }} \
bash -c '
MYPYPATH=components/src:lib/bindings/python/src
TARGETS=$(find components/src/dynamo -maxdepth 1 -mindepth 1 -type d | sort | tr "\n" " ")
MYPYPATH=$MYPYPATH mypy --explicit-package-bases $TARGETS
'
- name: Run mypy (bindings)
run: |
docker run --rm -w /workspace \
--name mypy_bindings_${{ github.run_id }}_${{ github.run_attempt }} \
${{ steps.image.outputs.test_image }} \
bash -c 'MYPYPATH=lib/bindings/python/src mypy -p dynamo'
# TODO: real xdist parallelism port conflicts. cpu_parallel_mode='none'
# below runs -n 0, so this job is NOT actually parallel right now.
parallel:
name: test
needs: image
uses: ./.github/workflows/shared-test.yml
with:
test_suite_name: dynamo
test_type: parallel
amd_runner: prod-tester-amd-v1
target_tag_plain: ${{ needs.image.outputs.target_tag_plain }}
cuda_version: '["${{ inputs.cuda_version }}"]'
platform: '["amd64", "arm64"]'
run_sanity_check: false
run_cpu_only_tests: true
cpu_only_test_markers: ${{ inputs.cpu_parallel_test_markers }}
cpu_only_test_timeout_minutes: 30
cpu_parallel_mode: 'none'
run_gpu_tests: false
secrets: inherit
sequential:
name: test
needs: image
uses: ./.github/workflows/shared-test.yml
with:
test_suite_name: dynamo
test_type: sequential
amd_runner: prod-tester-amd-v1
target_tag_plain: ${{ needs.image.outputs.target_tag_plain }}
cuda_version: '["${{ inputs.cuda_version }}"]'
platform: '["amd64", "arm64"]'
run_sanity_check: false
run_cpu_only_tests: true
cpu_only_test_markers: ${{ inputs.cpu_sequential_test_markers }}
cpu_only_test_timeout_minutes: 30
cpu_parallel_mode: 'none'
run_gpu_tests: false
secrets: inherit
gpu:
name: test
needs: image
uses: ./.github/workflows/shared-test.yml
with:
test_suite_name: dynamo
test_type: gpu
amd_runner: prod-tester-amd-gpu-v1
target_tag_plain: ${{ needs.image.outputs.target_tag_plain }}
cuda_version: '["${{ inputs.cuda_version }}"]'
platform: '["amd64"]'
run_sanity_check: false
run_cpu_only_tests: false
run_gpu_tests: true
gpu_test_markers: ${{ inputs.gpu_test_markers }}
gpu_test_timeout_minutes: 30
secrets: inherit
...@@ -119,6 +119,23 @@ jobs: ...@@ -119,6 +119,23 @@ jobs:
multi_gpu_test_timeout_minutes: 120 multi_gpu_test_timeout_minutes: 120
secrets: inherit secrets: inherit
# ============================================================================
# DYNAMO RUNTIME PIPELINE
# ============================================================================
dynamo-pipeline:
name: dynamo-runtime
needs: [create-fresh-builder]
uses: ./.github/workflows/dynamo-pipeline.yml
with:
builder_name: ${{ needs.create-fresh-builder.outputs.builder_name }}
fresh_builder: true
no_cache: true
build_timeout_minutes: 90
cpu_parallel_test_markers: 'parallel and not (vllm or sglang or trtllm) and (gpu_0)'
cpu_sequential_test_markers: 'not parallel and not (vllm or sglang or trtllm) and (gpu_0)'
gpu_test_markers: 'none and gpu_1'
secrets: inherit
# ============================================================================ # ============================================================================
# CLEANUP # CLEANUP
# ============================================================================ # ============================================================================
...@@ -126,7 +143,7 @@ jobs: ...@@ -126,7 +143,7 @@ jobs:
name: Clean K8s builder if exists name: Clean K8s builder if exists
runs-on: prod-default-small-v2 runs-on: prod-default-small-v2
if: always() if: always()
needs: [vllm-pipeline, sglang-pipeline, trtllm-pipeline, create-fresh-builder] needs: [vllm-pipeline, sglang-pipeline, trtllm-pipeline, dynamo-pipeline, create-fresh-builder]
permissions: permissions:
contents: read contents: read
steps: steps:
...@@ -149,7 +166,7 @@ jobs: ...@@ -149,7 +166,7 @@ jobs:
name: Notify Slack name: Notify Slack
runs-on: ubuntu-slim runs-on: ubuntu-slim
if: always() && failure() if: always() && failure()
needs: [ vllm-pipeline, sglang-pipeline, trtllm-pipeline ] needs: [ vllm-pipeline, sglang-pipeline, trtllm-pipeline, dynamo-pipeline ]
permissions: permissions:
contents: read contents: read
steps: steps:
......
...@@ -117,6 +117,19 @@ jobs: ...@@ -117,6 +117,19 @@ jobs:
multi_gpu_test_timeout_minutes: 60 multi_gpu_test_timeout_minutes: 60
secrets: inherit secrets: inherit
# ============================================================================
# DYNAMO RUNTIME PIPELINE
# ============================================================================
dynamo-pipeline:
name: dynamo-runtime
uses: ./.github/workflows/dynamo-pipeline.yml
with:
builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
cpu_parallel_test_markers: '(pre_merge or post_merge) and parallel and not (vllm or sglang or trtllm) and (gpu_0)'
cpu_sequential_test_markers: '(pre_merge or post_merge) and not parallel and not (vllm or sglang or trtllm) and (gpu_0)'
gpu_test_markers: '(pre_merge or post_merge) and none and gpu_1'
secrets: inherit
# ============================================================================ # ============================================================================
# DEV PIPELINES # DEV PIPELINES
# ============================================================================ # ============================================================================
...@@ -434,7 +447,7 @@ jobs: ...@@ -434,7 +447,7 @@ jobs:
name: Clean K8s builder if exists name: Clean K8s builder if exists
runs-on: prod-default-small-v2 runs-on: prod-default-small-v2
if: always() if: always()
needs: [planner-pipeline, vllm-pipeline, sglang-pipeline, trtllm-pipeline, vllm-dev-pipeline, sglang-dev-pipeline, trtllm-dev-pipeline, vllm-efa-pipeline, trtllm-efa-pipeline, operator, frontend-image] needs: [planner-pipeline, vllm-pipeline, sglang-pipeline, trtllm-pipeline, dynamo-pipeline, vllm-dev-pipeline, sglang-dev-pipeline, trtllm-dev-pipeline, vllm-efa-pipeline, trtllm-efa-pipeline, operator, frontend-image]
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
...@@ -455,7 +468,7 @@ jobs: ...@@ -455,7 +468,7 @@ jobs:
name: Notify Slack name: Notify Slack
runs-on: ubuntu-slim runs-on: ubuntu-slim
if: always() && failure() if: always() && failure()
needs: [ planner-pipeline, vllm-pipeline, sglang-pipeline, trtllm-pipeline, vllm-dev-pipeline, sglang-dev-pipeline, trtllm-dev-pipeline, vllm-efa-pipeline, trtllm-efa-pipeline, operator, frontend-image, deploy-operator, deploy-test-vllm, deploy-test-sglang, deploy-test-trtllm, deploy-test-gaie ] needs: [ planner-pipeline, vllm-pipeline, sglang-pipeline, trtllm-pipeline, dynamo-pipeline, vllm-dev-pipeline, sglang-dev-pipeline, trtllm-dev-pipeline, vllm-efa-pipeline, trtllm-efa-pipeline, operator, frontend-image, deploy-operator, deploy-test-vllm, deploy-test-sglang, deploy-test-trtllm, deploy-test-gaie ]
permissions: permissions:
contents: read contents: read
steps: steps:
......
...@@ -30,6 +30,7 @@ jobs: ...@@ -30,6 +30,7 @@ jobs:
vllm: ${{ steps.changes.outputs.vllm }} vllm: ${{ steps.changes.outputs.vllm }}
sglang: ${{ steps.changes.outputs.sglang }} sglang: ${{ steps.changes.outputs.sglang }}
trtllm: ${{ steps.changes.outputs.trtllm }} trtllm: ${{ steps.changes.outputs.trtllm }}
frontend: ${{ steps.changes.outputs.frontend }}
builder_name: ${{ steps.export-builder-name.outputs.builder_name }} builder_name: ${{ steps.export-builder-name.outputs.builder_name }}
steps: steps:
- name: Checkout code - name: Checkout code
...@@ -83,6 +84,17 @@ jobs: ...@@ -83,6 +84,17 @@ jobs:
run: | run: |
echo '${{ toJson(needs) }}' | jq -e 'to_entries | map(.value.result) | all(. as $result | ["success", "skipped", "cancelled"] | any($result == .))' echo '${{ toJson(needs) }}' | jq -e 'to_entries | map(.value.result) | all(. as $result | ["success", "skipped", "cancelled"] | any($result == .))'
dynamo-status-check:
runs-on: ubuntu-slim
needs:
- changed-files
- dynamo-pipeline
if: always()
steps:
- name: Check all dynamo jobs
run: |
echo '${{ toJson(needs) }}' | jq -e 'to_entries | map(.value.result) | all(. as $result | ["success", "skipped"] | any($result == .))'
# ============================================================================ # ============================================================================
# Operator # Operator
# ============================================================================ # ============================================================================
...@@ -371,6 +383,28 @@ jobs: ...@@ -371,6 +383,28 @@ jobs:
run_gpu_tests: false run_gpu_tests: false
secrets: inherit secrets: inherit
# ============================================================================
# DYNAMO RUNTIME PIPELINE
# ============================================================================
dynamo-pipeline:
name: dynamo-runtime
needs: [changed-files]
if: |
needs.changed-files.outputs.core == 'true' ||
needs.changed-files.outputs.planner == 'true' ||
needs.changed-files.outputs.frontend == 'true' ||
needs.changed-files.outputs.vllm == 'true' ||
needs.changed-files.outputs.sglang == 'true' ||
needs.changed-files.outputs.trtllm == 'true'
uses: ./.github/workflows/dynamo-pipeline.yml
with:
builder_name: ${{ needs.changed-files.outputs.builder_name }}
cpu_parallel_test_markers: 'pre_merge and parallel and not (vllm or sglang or trtllm) and (gpu_0)'
cpu_sequential_test_markers: 'pre_merge and not parallel and not (vllm or sglang or trtllm) and (gpu_0)'
gpu_test_markers: 'pre_merge and none and gpu_1'
secrets: inherit
# ============================================================================ # ============================================================================
# IMAGE COMPLIANCE PIPELINES # IMAGE COMPLIANCE PIPELINES
# ============================================================================ # ============================================================================
...@@ -586,6 +620,7 @@ jobs: ...@@ -586,6 +620,7 @@ jobs:
- sglang-multi-gpu-test - sglang-multi-gpu-test
- trtllm-copy-to-acr - trtllm-copy-to-acr
- trtllm-multi-gpu-test - trtllm-multi-gpu-test
- dynamo-pipeline
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment