Unverified Commit 21a03b31 authored by Dillon Cullinan's avatar Dillon Cullinan Committed by GitHub
Browse files

ci: OPS-2134: Add framework builds to nightlies (#4435)


Signed-off-by: default avatarDillon Cullinan <dcullinan@nvidia.com>
Co-authored-by: default avatarNate Mailhot <nmailhot@nvidia.com>
parent 5fe0476e
...@@ -174,7 +174,7 @@ runs: ...@@ -174,7 +174,7 @@ runs:
# Create job-specific metrics file # Create job-specific metrics file
mkdir -p build-metrics mkdir -p build-metrics
METRICS_FILE="build-metrics/metrics-${{ inputs.framework }}-${PLATFORM_ARCH}.json" METRICS_FILE="build-metrics/metrics-${{ inputs.framework }}-${PLATFORM_ARCH}-${{ github.run_id }}-${{ job.check_run_id }}.json"
# Create the job metrics file directly # Create the job metrics file directly
cat > "$METRICS_FILE" << EOF cat > "$METRICS_FILE" << EOF
...@@ -199,6 +199,6 @@ runs: ...@@ -199,6 +199,6 @@ runs:
- name: Upload Build Metrics - name: Upload Build Metrics
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-metrics-${{ inputs.framework }}-${{ env.PLATFORM_ARCH }} name: build-metrics-${{ inputs.framework }}-${{ env.PLATFORM_ARCH }}-${{ github.run_id }}-${{ job.check_run_id }}
path: build-metrics/metrics-${{ inputs.framework }}-${{ env.PLATFORM_ARCH }}.json path: build-metrics/metrics-${{ inputs.framework }}-${{ env.PLATFORM_ARCH }}-${{ github.run_id }}-${{ job.check_run_id }}.json
retention-days: 7 retention-days: 7
...@@ -17,7 +17,7 @@ jobs: ...@@ -17,7 +17,7 @@ jobs:
runs-on: ${{ matrix.platform.runner }} runs-on: ${{ matrix.platform.runner }}
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v4 uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 #v4.3.1
- name: Build vLLM Docker Image - name: Build vLLM Docker Image
id: build-vllm id: build-vllm
uses: ./.github/actions/docker-build uses: ./.github/actions/docker-build
...@@ -68,3 +68,78 @@ jobs: ...@@ -68,3 +68,78 @@ jobs:
framework: "vllm" framework: "vllm"
test_type: "e2e" test_type: "e2e"
platform_arch: ${{ matrix.platform.arch }} platform_arch: ${{ matrix.platform.arch }}
####################
# Framework Builds #
####################
vllm-framework:
strategy:
fail-fast: false
matrix:
platform:
- { arch: amd64, runner: cpu-amd-m5-4xlarge }
- { arch: arm64, runner: cpu-arm-r8g-4xlarge }
name: vllm-framework (${{ matrix.platform.arch }})
runs-on: ${{ matrix.platform.runner }}
env:
FRAMEWORK: vllm
steps: &framework-build-steps
- name: Checkout code
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 #v4.3.1
with:
ref: main
- name: Build Image
id: build-image
uses: ./.github/actions/docker-build
with:
framework: ${{ env.FRAMEWORK }}
target: framework
platform: linux/${{ matrix.platform.arch }}
# Ternary operations that are specific to vllm/arm64, empty str for all other combinations
base_image_tag: ${{ (matrix.platform.arch == 'arm64' && env.FRAMEWORK == 'vllm') && '25.06-cuda12.9-devel-ubuntu24.04' || '' }}
runtime_image_tag: ${{ (matrix.platform.arch == 'arm64' && env.FRAMEWORK == 'vllm') && '12.9.0-runtime-ubuntu24.04' || '' }}
cuda_version: ${{ (matrix.platform.arch == 'arm64' && env.FRAMEWORK == 'vllm') && '129' || '' }}
torch_backend: ${{ (matrix.platform.arch == 'arm64' && env.FRAMEWORK == 'vllm') && 'cu129' || '' }}
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
- name: Docker Tag and Push
uses: ./.github/actions/docker-tag-push
with:
local_image: ${{ steps.build-image.outputs.image_tag }}
push_tag: ai-dynamo/dynamo:main-${{ env.FRAMEWORK }}-framework-${{ matrix.platform.arch }}
aws_push: 'true'
azure_push: 'false'
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
sglang-framework:
strategy:
fail-fast: false
matrix:
platform:
- { arch: amd64, runner: cpu-amd-m5-4xlarge }
- { arch: arm64, runner: cpu-arm-r8g-4xlarge }
name: sglang-framework (${{ matrix.platform.arch }})
runs-on: ${{ matrix.platform.runner }}
env:
FRAMEWORK: sglang
steps: *framework-build-steps
trtllm-framework:
strategy:
fail-fast: false
matrix:
platform:
- { arch: amd64, runner: cpu-amd-m5-4xlarge }
- { arch: arm64, runner: cpu-arm-r8g-4xlarge }
name: trtllm-framework (${{ matrix.platform.arch }})
runs-on: ${{ matrix.platform.runner }}
env:
FRAMEWORK: trtllm
steps: *framework-build-steps
...@@ -180,21 +180,21 @@ class BuildMetricsReader: ...@@ -180,21 +180,21 @@ class BuildMetricsReader:
# Fallback to individual file approach for backward compatibility # Fallback to individual file approach for backward compatibility
# Try framework-specific artifact (direct path) # Try framework-specific artifact (direct path)
artifact_path = f"build-metrics/metrics-{framework}-{preferred_arch}.json" artifact_path = f"build-metrics/metrics-{framework}-{preferred_arch}-*.json"
if not os.path.exists(artifact_path): if not os.path.exists(artifact_path):
# Try the other architecture (direct path) # Try the other architecture (direct path)
other_arch = "arm64" if preferred_arch == "amd64" else "amd64" other_arch = "arm64" if preferred_arch == "amd64" else "amd64"
artifact_path = f"build-metrics/metrics-{framework}-{other_arch}.json" artifact_path = f"build-metrics/metrics-{framework}-{other_arch}-*.json"
if not os.path.exists(artifact_path): if not os.path.exists(artifact_path):
# Try artifact subdirectory structure (new format) # Try artifact subdirectory structure (new format)
artifact_path = f"build-metrics/build-metrics-{framework}-{preferred_arch}/metrics-{framework}-{preferred_arch}.json" artifact_path = f"build-metrics/build-metrics-{framework}-{preferred_arch}/metrics-{framework}-{preferred_arch}-*.json"
if not os.path.exists(artifact_path): if not os.path.exists(artifact_path):
# Try other architecture in subdirectory # Try other architecture in subdirectory
other_arch = "arm64" if preferred_arch == "amd64" else "amd64" other_arch = "arm64" if preferred_arch == "amd64" else "amd64"
artifact_path = f"build-metrics/build-metrics-{framework}-{other_arch}/metrics-{framework}-{other_arch}.json" artifact_path = f"build-metrics/build-metrics-{framework}-{other_arch}/metrics-{framework}-{other_arch}-*.json"
if not os.path.exists(artifact_path): if not os.path.exists(artifact_path):
# Try old naming convention (backward compatibility) # Try old naming convention (backward compatibility)
artifact_path = f"build-metrics/metrics-{framework}.json" artifact_path = f"build-metrics/metrics-{framework}-*.json"
if not os.path.exists(artifact_path): if not os.path.exists(artifact_path):
# Try alternative path (old format) # Try alternative path (old format)
artifact_path = f"build-metrics/build-metrics-{framework}/metrics.json" artifact_path = f"build-metrics/build-metrics-{framework}/metrics.json"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment