Unverified Commit 65a1e1b4 authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

ci: conditional backend github workflow (#3141)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
parent a69efbd1
name: 'Docker Build'
description: 'Build Dynamo container images'
inputs:
framework:
description: 'Framework to build'
required: true
default: 'vllm'
target:
description: 'Target to build'
required: false
default: 'runtime'
image_tag:
description: 'Custom image tag (optional, defaults to framework:latest)'
required: false
ngc_ci_access_token:
description: 'NGC CI Access Token'
required: false
ci_token:
description: 'CI Token'
required: false
aws_default_region:
description: 'AWS Default Region'
required: false
sccache_s3_bucket:
description: 'SCCache S3 Bucket'
required: false
aws_access_key_id:
description: 'AWS Access Key ID'
required: false
aws_secret_access_key:
description: 'AWS Secret Access Key'
required: false
outputs:
image_tag:
description: 'Image Tag'
value: ${{ steps.build.outputs.image_tag }}
runs:
using: "composite"
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to NGC
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push'
shell: bash
run: |
echo "${{ inputs.ngc_ci_access_token }}" | docker login nvcr.io -u '$oauthtoken' --password-stdin
- name: Cleanup
if: always()
shell: bash
run: |
docker system prune -af
- name: Build image
id: build
shell: bash
env:
GITHUB_TOKEN: ${{ inputs.ci_token }}
AWS_DEFAULT_REGION: ${{ inputs.aws_default_region }}
SCCACHE_S3_BUCKET: ${{ inputs.sccache_s3_bucket }}
AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }}
run: |
# Determine image tag
if [ -n "${{ inputs.image_tag }}" ]; then
IMAGE_TAG="${{ inputs.image_tag }}"
else
IMAGE_TAG="${{ inputs.framework }}:latest"
fi
echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT
./container/build.sh --tag "$IMAGE_TAG" \
--target ${{ inputs.target }} \
--framework ${{ inputs.framework }} \
--use-sccache \
--sccache-bucket "$SCCACHE_S3_BUCKET" \
--sccache-region "$AWS_DEFAULT_REGION"
name: 'Pytest'
description: 'Run pytest on pre-built container images'
inputs:
pytest_marks:
description: 'Pytest marks'
required: true
default: 'e2e and vllm and gpu_1 and not slow'
image_tag:
description: 'Image Tag to run tests on'
required: true
runs:
using: "composite"
steps:
- name: Run tests
shell: bash
env:
CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}
PYTEST_XML_FILE: pytest_test_report.xml
HF_HOME: /runner/_work/_temp
run: |
docker run --runtime=nvidia --rm --gpus all -w /workspace \
--network host \
--name ${{ env.CONTAINER_ID }}_pytest \
${{ inputs.image_tag }} \
bash -c "pytest -xsv --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ inputs.pytest_marks }}\""
docs: &docs
- 'docs/**'
- '**/*.md'
- '**/*.rst'
ci: &ci
- '.github/workflows/**'
- '.github/filters.yaml'
- '.github/actions/**'
has_code_changes:
- *ci
- 'benchmarks/**'
- 'components/**'
- 'container/**'
- 'deploy/**'
- 'examples/**'
- 'launch/**'
- 'lib/**'
- 'recipes/**'
- 'tests/**'
- '*.toml'
- '*.lock'
- '*.py'
- '*.rs'
vllm: &vllm
- 'container/Dockerfile.vllm'
- 'container/deps/requirements.vllm.txt'
- 'container/deps/vllm/**'
- 'components/backends/vllm/**'
- 'tests/serve/test_vllm.py'
sglang: &sglang
- 'container/Dockerfile.sglang'
- 'container/Dockerfile.sglang-wideep'
- 'components/backends/sglang/**'
- 'container/build.sh'
- 'tests/serve/test_sglang.py'
trtllm: &trtllm
- 'container/Dockerfile.trtllm'
- 'components/backends/trtllm/**'
- 'container/build.sh'
- 'container/build_trtllm_wheel.sh'
- 'container/deps/**'
- 'tests/serve/test_trtllm.py'
sdk:
- 'deploy/**'
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
name: NVIDIA Dynamo Backends Github Validation
name: Docker Build and Test
on:
push:
......@@ -9,70 +9,104 @@ on:
- main
- "pull-request/[0-9]+"
concurrency:
group: ${{ github.workflow }}-build-test-${{ github.ref_name || github.run_id }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
jobs:
build-test:
changed-files:
runs-on: ubuntu-latest
outputs:
has_code_changes: ${{ steps.filter.outputs.has_code_changes }}
steps:
- name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Check for changes
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
id: filter
with:
filters: .github/filters.yaml
backend-status-check:
runs-on: ubuntu-latest
needs: [vllm, sglang, trtllm]
if: always()
steps:
- name: "Check all dependent jobs"
run: |
echo '${{ toJson(needs) }}' | jq -e 'to_entries | map(.value.result) | all(. as $result | ["success", "skipped"] | any($result == .))'
vllm:
runs-on: gpu-l40-amd64
strategy:
fail-fast: false
matrix:
framework: [vllm, sglang, trtllm]
include:
- framework: vllm
needs: changed-files
if: needs.changed-files.outputs.has_code_changes == 'true'
steps:
- name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Build Container
id: build-image
uses: ./.github/actions/docker-build
with:
framework: vllm
target: runtime
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
- name: Run tests
uses: ./.github/actions/pytest
with:
image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "e2e and vllm and gpu_1 and not slow"
- framework: sglang
target: runtime
pytest_marks: "e2e and sglang and gpu_1 and not slow"
- framework: trtllm
target: runtime
pytest_marks: "e2e and trtllm_marker and gpu_1 and not slow"
# Do not cancel main branch runs
concurrency:
group: ${{ github.workflow }}-${{ matrix.framework }}-build-test-${{ github.ref_name || github.run_id }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
name: Build and Test - ${{ matrix.framework }}
env:
CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}_${{ matrix.framework }}
PYTEST_XML_FILE: pytest_test_report.xml
FRAMEWORK: ${{ matrix.framework }}
TARGET: ${{ matrix.target }}
PYTEST_MARKS: ${{ matrix.pytest_marks }}
sglang:
runs-on: gpu-l40-amd64
needs: changed-files
if: needs.changed-files.outputs.has_code_changes == 'true'
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to NGC
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push'
run: |
echo "${{ secrets.NGC_CI_ACCESS_TOKEN }}" | docker login nvcr.io -u '$oauthtoken' --password-stdin
- name: Cleanup
if: always()
run: |
docker system prune -af
- name: Build image
env:
GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
SCCACHE_S3_BUCKET: ${{ secrets.SCCACHE_S3_BUCKET }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: |
./container/build.sh --tag ${{ matrix.framework }}:latest \
--target ${{ matrix.target }} \
--framework ${{ matrix.framework }} \
--use-sccache \
--sccache-bucket "$SCCACHE_S3_BUCKET" \
--sccache-region "$AWS_DEFAULT_REGION"
- name: Run pytest
env:
HF_HOME: /runner/_work/_temp
run: |
docker run --runtime=nvidia --rm --gpus all -w /workspace \
--network host \
--name ${{ env.CONTAINER_ID }}_pytest \
${{ matrix.framework }}:latest \
bash -c "pytest -xsv --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Build Container
id: build-image
uses: ./.github/actions/docker-build
with:
framework: sglang
target: runtime
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
- name: Run tests
uses: ./.github/actions/pytest
with:
image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "e2e and sglang and gpu_1"
trtllm:
runs-on: gpu-l40-amd64
needs: changed-files
if: needs.changed-files.outputs.has_code_changes == 'true'
steps:
- name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Build Container
id: build-image
uses: ./.github/actions/docker-build
with:
framework: trtllm
target: runtime
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
- name: Run tests
uses: ./.github/actions/pytest
with:
image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "e2e and trtllm_marker and gpu_1 and not slow"
......@@ -51,28 +51,7 @@ jobs:
id: src_changes
uses: dorny/paths-filter@v3
with:
filters: |
vllm:
- 'container/Dockerfile.vllm'
- 'container/deps/requirements.vllm.txt'
- 'container/deps/vllm/**'
- 'components/backends/vllm/**'
- 'tests/serve/test_vllm.py'
trtllm:
- 'container/Dockerfile.trtllm'
- 'components/backends/trtllm/**'
- 'container/build.sh'
- 'container/build_trtllm_wheel.sh'
- 'container/deps/**'
- 'tests/serve/test_trtllm.py'
sdk:
- 'deploy/**'
sglang:
- 'container/Dockerfile.sglang'
- 'container/Dockerfile.sglang-deepep'
- 'components/backends/sglang/**'
- 'container/build.sh'
- 'tests/serve/test_sglang.py'
filters: .github/filters.yaml
- name: Check if Validation Workflow has run
id: check_workflow
uses: actions/github-script@v6
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment