Unverified Commit 84b5e9b5 authored by Dmitry Tokarev's avatar Dmitry Tokarev Committed by GitHub
Browse files

fix: trtllm builds in ci-test-suite.yml (#5892)


Signed-off-by: default avatarDmitry Tokarev <dtokarev@nvidia.com>
parent 7e970d44
......@@ -69,7 +69,6 @@ defaults:
env:
REGISTRY_IMAGE: ai-dynamo/dynamo
IMAGE_PREFIX: ${{ inputs.image_prefix }}
############################## BUILD JOBS ##############################
jobs:
......@@ -80,7 +79,7 @@ jobs:
strategy:
fail-fast: false
matrix:
framework: [vllm, trtllm, sglang]
framework: [vllm, sglang]
env:
ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
steps:
......@@ -136,10 +135,6 @@ jobs:
base_image_tag: '25.06-cuda12.9-devel-ubuntu24.04'
runtime_image_tag: '12.9.0-runtime-ubuntu24.04'
cuda_version: '12.9'
- framework: trtllm
base_image_tag: '25.06-py3'
runtime_image_tag: ''
cuda_version: '12.9'
- framework: sglang
base_image_tag: ''
runtime_image_tag: ''
......@@ -195,7 +190,7 @@ jobs:
strategy:
fail-fast: false
matrix:
framework: [vllm, sglang]
framework: [vllm, trtllm, sglang]
env:
ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
steps:
......@@ -218,7 +213,7 @@ jobs:
platform: linux/amd64
base_image_tag: ''
runtime_image_tag: ''
cuda_version: '13.0'
cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
......@@ -233,6 +228,7 @@ jobs:
push_tags: |
${{ env.REGISTRY_IMAGE }}:${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-cuda13-amd64
${{ env.REGISTRY_IMAGE }}:${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-cuda13-amd64-run-${{ github.run_id }}
${{ matrix.framework == 'trtllm' && format('{0}:{1}-{2}-amd64', env.REGISTRY_IMAGE, env.IMAGE_PREFIX, matrix.framework) || '' }}
aws_push: 'true'
azure_push: 'true'
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
......@@ -246,7 +242,7 @@ jobs:
strategy:
fail-fast: false
matrix:
framework: [vllm, sglang]
framework: [vllm, trtllm, sglang]
env:
ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
steps:
......@@ -269,7 +265,7 @@ jobs:
platform: linux/arm64
base_image_tag: ''
runtime_image_tag: ''
cuda_version: '13.0'
cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
......@@ -284,6 +280,7 @@ jobs:
push_tags: |
${{ env.REGISTRY_IMAGE }}:${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-cuda13-arm64
${{ env.REGISTRY_IMAGE }}:${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-cuda13-arm64-run-${{ github.run_id }}
${{ matrix.framework == 'trtllm' && format('{0}:{1}-{2}-arm64', env.REGISTRY_IMAGE, env.IMAGE_PREFIX, matrix.framework) || '' }}
aws_push: 'true'
azure_push: 'true'
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
......@@ -294,7 +291,7 @@ jobs:
unit-tests:
name: ${{ matrix.framework }}-${{ matrix.arch.arch }}-unit
needs: [build-amd64, build-arm64]
needs: [build-amd64, build-arm64, build-cuda13-arm64, build-cuda13-amd64]
if: always() && inputs.skip_tests != true
runs-on: ${{ matrix.arch.runner }}
timeout-minutes: 45
......@@ -323,7 +320,7 @@ jobs:
echo "Repository: ${{ github.repository }}"
echo "Run ID: ${{ github.run_id }}"
BUILD_JOB_PATTERN="Build ${{ matrix.framework }} (${{ matrix.arch.arch }})"
BUILD_JOB_PATTERN="Build ${{ matrix.framework }} ${{ matrix.framework == 'trtllm' && 'CUDA13 ' || '' }}(${{ matrix.arch.arch }})"
echo "Looking for job pattern: '$BUILD_JOB_PATTERN'"
# Query GitHub API for job status
......@@ -427,7 +424,7 @@ jobs:
integration-tests:
name: ${{ matrix.framework }}-${{ matrix.arch.arch }}-integ
needs: [build-amd64, build-arm64]
needs: [build-amd64, build-arm64, build-cuda13-arm64, build-cuda13-amd64]
if: always()
runs-on: ${{ matrix.arch.runner }}
timeout-minutes: ${{ matrix.arch.timeout }}
......@@ -458,7 +455,7 @@ jobs:
echo "Repository: ${{ github.repository }}"
echo "Run ID: ${{ github.run_id }}"
BUILD_JOB_PATTERN="Build ${{ matrix.framework }} (${{ matrix.arch.arch }})"
BUILD_JOB_PATTERN="Build ${{ matrix.framework }} ${{ matrix.framework == 'trtllm' && 'CUDA13 ' || '' }}(${{ matrix.arch.arch }})"
echo "Looking for job pattern: '$BUILD_JOB_PATTERN'"
# Query GitHub API for job status
......@@ -561,7 +558,7 @@ jobs:
e2e-single-gpu-tests:
name: ${{ matrix.framework }}-${{ matrix.arch.arch }}-1gpu-e2e
needs: [build-amd64, build-arm64]
needs: [build-amd64, build-arm64, build-cuda13-arm64, build-cuda13-amd64]
if: always()
runs-on: ${{ matrix.arch.runner }}
timeout-minutes: ${{ matrix.arch.timeout }}
......@@ -594,7 +591,7 @@ jobs:
echo "Repository: ${{ github.repository }}"
echo "Run ID: ${{ github.run_id }}"
BUILD_JOB_PATTERN="Build ${{ matrix.framework }} (${{ matrix.arch.arch }})"
BUILD_JOB_PATTERN="Build ${{ matrix.framework }} ${{ matrix.framework == 'trtllm' && 'CUDA13 ' || '' }}(${{ matrix.arch.arch }})"
echo "Looking for job pattern: '$BUILD_JOB_PATTERN'"
# Query GitHub API for job status
......@@ -697,7 +694,7 @@ jobs:
e2e-multi-gpu-tests:
name: ${{ matrix.framework }}-${{ matrix.arch.arch }}-2gpu-e2e
needs: [build-amd64, build-arm64]
needs: [build-amd64, build-arm64, build-cuda13-arm64, build-cuda13-amd64]
if: always()
runs-on: ${{ matrix.arch.runner }}
timeout-minutes: ${{ matrix.arch.timeout }}
......@@ -730,7 +727,7 @@ jobs:
echo "Repository: ${{ github.repository }}"
echo "Run ID: ${{ github.run_id }}"
BUILD_JOB_PATTERN="Build ${{ matrix.framework }} (${{ matrix.arch.arch }})"
BUILD_JOB_PATTERN="Build ${{ matrix.framework }} ${{ matrix.framework == 'trtllm' && 'CUDA13 ' || '' }}(${{ matrix.arch.arch }})"
echo "Looking for job pattern: '$BUILD_JOB_PATTERN'"
# Query GitHub API for job status
......
......@@ -240,7 +240,7 @@ jobs:
- { arch: amd64, runner: prod-builder-amd-gpu-v1 }
- { arch: arm64, runner: prod-builder-arm-v1 }
cuda_version:
- { major_minor: '13.0', major: '13' }
- { major_minor: '13.1', major: '13' }
name: trtllm-build-test (cuda${{ matrix.cuda_version.major_minor}}, ${{ matrix.platform.arch }})
runs-on: ${{ matrix.platform.runner }}
timeout-minutes: 90
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment