Unverified Commit 3d8c497e authored by Ran Rubin's avatar Ran Rubin Committed by GitHub
Browse files

ci: Prod Runners (#5534)

Replacing infra for Github runners
parent 2a2ad756
...@@ -58,8 +58,8 @@ jobs: ...@@ -58,8 +58,8 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
platform: platform:
- { arch: amd64, runner: gpu-l40-amd64 } - { arch: amd64, runner: prod-builder-amd-v1 }
- { arch: arm64, runner: cpu-arm-r8g-4xlarge } - { arch: arm64, runner: prod-builder-arm-v1 }
runs-on: ${{ matrix.platform.runner }} runs-on: ${{ matrix.platform.runner }}
steps: steps:
- name: Checkout repository - name: Checkout repository
......
...@@ -75,7 +75,7 @@ env: ...@@ -75,7 +75,7 @@ env:
jobs: jobs:
build-amd64: build-amd64:
name: Build ${{ matrix.framework }} (amd64) name: Build ${{ matrix.framework }} (amd64)
runs-on: cpu-amd-m5-4xlarge runs-on: prod-builder-amd-v1
timeout-minutes: 120 timeout-minutes: 120
strategy: strategy:
fail-fast: false fail-fast: false
...@@ -126,7 +126,7 @@ jobs: ...@@ -126,7 +126,7 @@ jobs:
build-arm64: build-arm64:
name: Build ${{ matrix.framework }} (arm64) name: Build ${{ matrix.framework }} (arm64)
runs-on: cpu-arm-r8g-4xlarge runs-on: prod-builder-arm-v1
timeout-minutes: 120 timeout-minutes: 120
strategy: strategy:
fail-fast: false fail-fast: false
...@@ -304,9 +304,9 @@ jobs: ...@@ -304,9 +304,9 @@ jobs:
framework: [vllm, trtllm, sglang] framework: [vllm, trtllm, sglang]
arch: arch:
- arch: amd64 - arch: amd64
runner: gpu-l40-amd64 runner: prod-builder-amd-gpu-v1
- arch: arm64 - arch: arm64
runner: cpu-arm-r8g-4xlarge runner: prod-builder-arm-v1
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Check if build succeeded - name: Check if build succeeded
...@@ -437,10 +437,10 @@ jobs: ...@@ -437,10 +437,10 @@ jobs:
framework: [vllm, trtllm, sglang] framework: [vllm, trtllm, sglang]
arch: arch:
- arch: amd64 - arch: amd64
runner: gpu-l40-amd64 runner: prod-builder-amd-gpu-v1
timeout: 90 timeout: 90
- arch: arm64 - arch: arm64
runner: cpu-arm-r8g-4xlarge runner: prod-builder-arm-v1
timeout: 90 timeout: 90
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
...@@ -571,10 +571,10 @@ jobs: ...@@ -571,10 +571,10 @@ jobs:
framework: [vllm, trtllm, sglang] framework: [vllm, trtllm, sglang]
arch: arch:
- arch: amd64 - arch: amd64
runner: gpu-l40-amd64 runner: prod-builder-amd-gpu-v1
timeout: 120 timeout: 120
- arch: arm64 - arch: arm64
runner: cpu-arm-r8g-4xlarge runner: prod-builder-arm-v1
timeout: 120 timeout: 120
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
...@@ -707,10 +707,10 @@ jobs: ...@@ -707,10 +707,10 @@ jobs:
framework: [vllm, trtllm, sglang] framework: [vllm, trtllm, sglang]
arch: arch:
- arch: amd64 - arch: amd64
runner: gpu-l40-amd64 runner: prod-builder-amd-gpu-v1
timeout: 150 timeout: 150
- arch: arm64 - arch: arm64
runner: cpu-arm-r8g-4xlarge runner: prod-builder-arm-v1
timeout: 150 timeout: 150
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
...@@ -836,7 +836,7 @@ jobs: ...@@ -836,7 +836,7 @@ jobs:
name: ${{ matrix.framework.name }}-ft-k8s name: ${{ matrix.framework.name }}-ft-k8s
needs: [build-amd64] needs: [build-amd64]
if: always() if: always()
runs-on: cpu-amd-m5-4xlarge runs-on: prod-builder-amd-v1
timeout-minutes: 60 timeout-minutes: 60
strategy: strategy:
fail-fast: false fail-fast: false
...@@ -1074,7 +1074,7 @@ jobs: ...@@ -1074,7 +1074,7 @@ jobs:
############################## SLACK NOTIFICATION ############################## ############################## SLACK NOTIFICATION ##############################
notify-slack: notify-slack:
name: Notify Slack name: Notify Slack
runs-on: cpu-amd-m5-4xlarge runs-on: prod-builder-amd-v1
if: always() && inputs.enable_slack_notification && !github.event.repository.fork if: always() && inputs.enable_slack_notification && !github.event.repository.fork
needs: results-summary needs: results-summary
permissions: permissions:
......
...@@ -63,8 +63,8 @@ jobs: ...@@ -63,8 +63,8 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
platform: platform:
- { arch: amd64, runner: cpu-amd-m5-2xlarge } - { arch: amd64, runner: prod-builder-amd-v1 }
- { arch: arm64, runner: cpu-arm-r8g-4xlarge } - { arch: arm64, runner: prod-builder-arm-v1 }
name: operator-build (${{ matrix.platform.arch }}) name: operator-build (${{ matrix.platform.arch }})
runs-on: ${{ matrix.platform.runner }} runs-on: ${{ matrix.platform.runner }}
steps: steps:
...@@ -81,7 +81,6 @@ jobs: ...@@ -81,7 +81,6 @@ jobs:
- name: Docker Login - name: Docker Login
uses: ./.github/actions/docker-login uses: ./.github/actions/docker-login
with: with:
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }} azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
...@@ -141,8 +140,8 @@ jobs: ...@@ -141,8 +140,8 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
platform: platform:
- { arch: amd64, runner: gpu-l40-amd64 } - { arch: amd64, runner: prod-builder-amd-gpu-v1 }
- { arch: arm64, runner: cpu-arm-r8g-4xlarge } - { arch: arm64, runner: prod-builder-arm-v1 }
cuda_version: cuda_version:
- { major_minor: '13.0', major: '13' } - { major_minor: '13.0', major: '13' }
- { major_minor: '12.9', major: '12' } - { major_minor: '12.9', major: '12' }
...@@ -219,8 +218,8 @@ jobs: ...@@ -219,8 +218,8 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
platform: platform:
- { arch: amd64, runner: gpu-l40-amd64 } - { arch: amd64, runner: prod-builder-amd-gpu-v1 }
- { arch: arm64, runner: cpu-arm-r8g-4xlarge } - { arch: arm64, runner: prod-builder-arm-v1 }
cuda_version: cuda_version:
- { major_minor: '13.0', major: '13' } - { major_minor: '13.0', major: '13' }
- { major_minor: '12.9', major: '12' } - { major_minor: '12.9', major: '12' }
...@@ -238,8 +237,8 @@ jobs: ...@@ -238,8 +237,8 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
platform: platform:
- { arch: amd64, runner: gpu-l40-amd64 } - { arch: amd64, runner: prod-builder-amd-gpu-v1 }
- { arch: arm64, runner: cpu-arm-r8g-4xlarge } - { arch: arm64, runner: prod-builder-arm-v1 }
cuda_version: cuda_version:
- { major_minor: '13.0', major: '13' } - { major_minor: '13.0', major: '13' }
name: trtllm-build-test (cuda${{ matrix.cuda_version.major_minor}}, ${{ matrix.platform.arch }}) name: trtllm-build-test (cuda${{ matrix.cuda_version.major_minor}}, ${{ matrix.platform.arch }})
...@@ -250,7 +249,7 @@ jobs: ...@@ -250,7 +249,7 @@ jobs:
steps: *runtime-container-build-push-test steps: *runtime-container-build-push-test
deploy-operator: deploy-operator:
runs-on: cpu-amd-m5-2xlarge runs-on: prod-default-v1
if: needs.changed-files.outputs.core == 'true' if: needs.changed-files.outputs.core == 'true'
needs: [changed-files, operator, vllm, sglang, trtllm] needs: [changed-files, operator, vllm, sglang, trtllm]
env: env:
...@@ -325,7 +324,7 @@ jobs: ...@@ -325,7 +324,7 @@ jobs:
timeout 300s kubectl rollout status deployment -n $NAMESPACE --watch timeout 300s kubectl rollout status deployment -n $NAMESPACE --watch
deploy-test-vllm: deploy-test-vllm:
runs-on: cpu-amd-m5-2xlarge runs-on: prod-default-v1
# Run if push to main, or manually triggered # Run if push to main, or manually triggered
if: ( github.ref_name == 'main' || github.event.inputs.run_deploy_operator ) if: ( github.ref_name == 'main' || github.event.inputs.run_deploy_operator )
needs: [changed-files, deploy-operator, vllm] needs: [changed-files, deploy-operator, vllm]
...@@ -515,7 +514,7 @@ jobs: ...@@ -515,7 +514,7 @@ jobs:
# - Run vllm disagg_router on all commits # - Run vllm disagg_router on all commits
# - Run rest of jobs only on push to main or manual trigger # - Run rest of jobs only on push to main or manual trigger
deploy-test-vllm-disagg-router: deploy-test-vllm-disagg-router:
runs-on: cpu-amd-m5-2xlarge runs-on: prod-default-v1
if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' || needs.changed-files.outputs.deploy == 'true' if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' || needs.changed-files.outputs.deploy == 'true'
needs: [changed-files, deploy-operator, vllm] needs: [changed-files, deploy-operator, vllm]
permissions: permissions:
...@@ -535,7 +534,7 @@ jobs: ...@@ -535,7 +534,7 @@ jobs:
steps: *deploy-test-steps steps: *deploy-test-steps
deploy-test-sglang: deploy-test-sglang:
runs-on: cpu-amd-m5-2xlarge runs-on: prod-default-v1
# Run if push to main, or manually triggered # Run if push to main, or manually triggered
if: ( github.ref_name == 'main' || github.event.inputs.run_deploy_operator ) if: ( github.ref_name == 'main' || github.event.inputs.run_deploy_operator )
needs: [changed-files, deploy-operator, sglang] needs: [changed-files, deploy-operator, sglang]
...@@ -557,7 +556,7 @@ jobs: ...@@ -557,7 +556,7 @@ jobs:
steps: *deploy-test-steps steps: *deploy-test-steps
deploy-test-trtllm: deploy-test-trtllm:
runs-on: cpu-amd-m5-2xlarge runs-on: prod-default-v1
# Run if push to main, or manually triggered # Run if push to main, or manually triggered
if: ( github.ref_name == 'main' || github.event.inputs.run_deploy_operator ) if: ( github.ref_name == 'main' || github.event.inputs.run_deploy_operator )
needs: [changed-files, deploy-operator, trtllm] needs: [changed-files, deploy-operator, trtllm]
...@@ -581,7 +580,7 @@ jobs: ...@@ -581,7 +580,7 @@ jobs:
steps: *deploy-test-steps steps: *deploy-test-steps
cleanup: cleanup:
runs-on: cpu-amd-m5-2xlarge runs-on: prod-default-v1
if: always() if: always()
needs: [changed-files, deploy-operator, deploy-test-trtllm, deploy-test-sglang, deploy-test-vllm, deploy-test-vllm-disagg-router] needs: [changed-files, deploy-operator, deploy-test-trtllm, deploy-test-sglang, deploy-test-vllm, deploy-test-vllm-disagg-router]
steps: steps:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment