# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 name: Nightly CI Pipeline on: schedule: - cron: '0 8 * * *' # Every day at 12:00 AM PST (08:00 UTC) workflow_dispatch: # Allow manual triggering for testing permissions: contents: read jobs: # ============================================================================ # FRAMEWORK PIPELINES (Build → Test → Copy) # ============================================================================ # ============================================================================ # VLLM PIPELINE # ============================================================================ vllm-pipeline: uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml with: framework: vllm target: runtime no_cache: true platforms: '["amd64", "arm64"]' cuda_versions: '["12.9", "13.0"]' extra_tags: | ${{ github.ref_name == 'main' && 'main-vllm' || '' }} ${{ github.ref_name == 'main' && format('main-vllm-{0}', github.sha) || '' }} builder_name: b-${{ github.run_id }}-${{ github.run_attempt }} build_timeout_minutes: 180 cpu_only_test_markers: 'vllm and gpu_0' single_gpu_test_markers: 'vllm and gpu_1' single_gpu_test_timeout_minutes: 180 multi_gpu_test_markers: 'vllm and (gpu_2 or gpu_4)' multi_gpu_test_timeout_minutes: 120 secrets: inherit # ============================================================================ # SGLANG PIPELINE # ============================================================================ sglang-pipeline: uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml with: framework: sglang target: runtime no_cache: true platforms: '["amd64", "arm64"]' cuda_versions: '["12.9", "13.0"]' extra_tags: | ${{ github.ref_name == 'main' && 'main-sglang' || '' }} ${{ github.ref_name == 'main' && format('main-sglang-{0}', github.sha) || '' }} builder_name: b-${{ github.run_id }}-${{ github.run_attempt }} build_timeout_minutes: 180 cpu_only_test_markers: 'sglang and gpu_0' single_gpu_test_markers: 'sglang and gpu_1' single_gpu_test_timeout_minutes: 180 multi_gpu_test_markers: 'sglang and (gpu_2 or gpu_4)' multi_gpu_test_timeout_minutes: 120 secrets: inherit # ============================================================================ # TRTLLM PIPELINE # ============================================================================ trtllm-pipeline: uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml with: framework: trtllm target: runtime no_cache: true platforms: '["amd64", "arm64"]' cuda_versions: '["13.1"]' extra_tags: | ${{ github.ref_name == 'main' && 'main-trtllm' || '' }} ${{ github.ref_name == 'main' && format('main-trtllm-{0}', github.sha) || '' }} builder_name: b-${{ github.run_id }}-${{ github.run_attempt }} build_timeout_minutes: 180 cpu_only_test_markers: 'trtllm and gpu_0' single_gpu_test_markers: 'trtllm and gpu_1' single_gpu_test_timeout_minutes: 180 multi_gpu_test_markers: 'trtllm and (gpu_2 or gpu_4)' multi_gpu_test_timeout_minutes: 120 secrets: inherit ############################## SLACK NOTIFICATION ############################## notify-slack: name: Notify Slack runs-on: prod-builder-amd-v1 if: always() && failure() needs: [ vllm-pipeline, sglang-pipeline, trtllm-pipeline ] permissions: contents: read steps: - name: Get Failed jobs shell: bash env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | JOBS_JSON=$(mktemp) curl -sSL \ -H "Authorization: Bearer ${GITHUB_TOKEN}" \ -H "Accept: application/vnd.github+json" \ "https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs?per_page=100" \ >$JOBS_JSON FAILED_JOBS=$(jq -r '.jobs[] | select(.conclusion == "failure") | .name | split(" / ") | if length > 2 then ":failed: " + .[0] + " > " + .[-1] else ":failed: " + .[-1] end | . + "\\n"' "$JOBS_JSON") echo $FAILED_JOBS { echo "FAILED_JOBS<> "$GITHUB_ENV" - name: Notify Slack uses: slackapi/slack-github-action@91efab103c0de0a537f72a35f6b8cda0ee76bf0a #v2.1.1 with: webhook: ${{ secrets.SLACK_NOTIFY_NIGHTLY_WEBHOOK_URL }} webhook-type: incoming-webhook payload: | blocks: - type: "section" text: type: mrkdwn text: ":alert: *Github Nightly Pipeline Failure*" - type: "section" text: type: mrkdwn text: "" - type: "section" text: type: mrkdwn text: "${{ env.FAILED_JOBS }}" - type: "section" text: type: mrkdwn text: "@ops-support Please investigate the failures above."