Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
25b769e5
Unverified
Commit
25b769e5
authored
Feb 16, 2026
by
Dillon Cullinan
Committed by
GitHub
Feb 16, 2026
Browse files
ci: OPS-3142: Add multi-gpu test job (#6189)
Signed-off-by:
Dillon Cullinan
<
dcullinan@nvidia.com
>
parent
1349b890
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
76 additions
and
3 deletions
+76
-3
.github/workflows/build-test-distribute-flavor-matrix.yml
.github/workflows/build-test-distribute-flavor-matrix.yml
+6
-0
.github/workflows/build-test-distribute-flavor.yml
.github/workflows/build-test-distribute-flavor.yml
+67
-3
.github/workflows/pr.yaml
.github/workflows/pr.yaml
+3
-0
No files found.
.github/workflows/build-test-distribute-flavor-matrix.yml
View file @
25b769e5
...
@@ -27,6 +27,11 @@ on:
...
@@ -27,6 +27,11 @@ on:
required
:
false
required
:
false
type
:
boolean
type
:
boolean
default
:
true
default
:
true
run_multi_gpu_tests
:
description
:
'
Whether
to
run
multi-gpu
tests'
required
:
false
type
:
boolean
default
:
false
copy_to_acr
:
copy_to_acr
:
description
:
'
Whether
to
copy
images
to
ACR'
description
:
'
Whether
to
copy
images
to
ACR'
required
:
false
required
:
false
...
@@ -117,6 +122,7 @@ jobs:
...
@@ -117,6 +122,7 @@ jobs:
builder_name
:
${{ inputs.builder_name }}
builder_name
:
${{ inputs.builder_name }}
build_image
:
${{ inputs.build_image }}
build_image
:
${{ inputs.build_image }}
run_tests
:
${{ inputs.run_tests }}
run_tests
:
${{ inputs.run_tests }}
run_multi_gpu_tests
:
${{ inputs.run_multi_gpu_tests }}
copy_to_acr
:
${{ inputs.copy_to_acr && matrix.platform == 'amd64' }}
# no reason to copy ARM images to ACR
copy_to_acr
:
${{ inputs.copy_to_acr && matrix.platform == 'amd64' }}
# no reason to copy ARM images to ACR
push_image
:
${{ inputs.push_image }}
push_image
:
${{ inputs.push_image }}
build_timeout_minutes
:
${{ inputs.build_timeout_minutes }}
build_timeout_minutes
:
${{ inputs.build_timeout_minutes }}
...
...
.github/workflows/build-test-distribute-flavor.yml
View file @
25b769e5
...
@@ -27,6 +27,11 @@ on:
...
@@ -27,6 +27,11 @@ on:
required
:
false
required
:
false
type
:
boolean
type
:
boolean
default
:
true
default
:
true
run_multi_gpu_tests
:
description
:
'
Whether
to
run
multi-gpu
tests'
required
:
false
type
:
boolean
default
:
false
copy_to_acr
:
copy_to_acr
:
description
:
'
Whether
to
copy
images
to
ACR'
description
:
'
Whether
to
copy
images
to
ACR'
required
:
false
required
:
false
...
@@ -322,9 +327,68 @@ jobs:
...
@@ -322,9 +327,68 @@ jobs:
parallel_mode
:
'
none'
parallel_mode
:
'
none'
dind_as_sidecar
:
'
true'
dind_as_sidecar
:
'
true'
# ============================================================================
# ============================================================================
# COPY TO ACR
# MULTI-GPU TESTS
# ============================================================================
# ============================================================================
multi-gpu-test
:
# Multi-GPU support limited to AMD64 only
if
:
|
inputs.run_multi_gpu_tests &&
inputs.build_image &&
( inputs.platform != 'arm64' )
needs
:
[
build
]
name
:
Multi-gpu Test ${{ inputs.framework }}-cuda${{ inputs.cuda_version }}-${{ inputs.platform }}
runs-on
:
prod-tester-amd-gpu-4-v1
env
:
FRAMEWORK
:
${{ inputs.framework }}
steps
:
-
name
:
Checkout repository
uses
:
actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
# v4.3.0
-
name
:
Calculate target tag
id
:
calculate-target-tag
shell
:
bash
run
:
|
CUDA_VERSION_RAW=${{ inputs.cuda_version }}
CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
TEST_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}-${{ inputs.platform }}
echo "test_image=${TEST_IMAGE}" >> $GITHUB_OUTPUT
-
name
:
Docker Login
uses
:
./.github/actions/docker-login
with
:
aws_default_region
:
${{ secrets.AWS_DEFAULT_REGION }}
aws_account_id
:
${{ secrets.AWS_ACCOUNT_ID }}
azure_acr_hostname
:
${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user
:
${{ secrets.AZURE_ACR_USER }}
azure_acr_password
:
${{ secrets.AZURE_ACR_PASSWORD }}
-
name
:
Pull relevant images
shell
:
bash
run
:
|
start_time=$(date +%s)
docker pull ${{ steps.calculate-target-tag.outputs.test_image }}
docker pull quay.io/minio/minio
end_time=$(date +%s)
duration=$((end_time - start_time))
echo "⏱️ Image pull duration: ${duration}s"
# Run GPU tests sequentially (only on amd64 runners with GPU)
# These are e2e tests marked with gpu_2 or gpu_4 that require GPU hardware
-
name
:
Run GPU tests (sequential)
uses
:
./.github/actions/pytest
with
:
image_tag
:
${{ steps.calculate-target-tag.outputs.test_image }}
pytest_marks
:
'
(gpu_2
or
gpu_4)
and
pre_merge'
framework
:
${{ inputs.framework }}
test_type
:
"
pre_merge_gpu"
platform_arch
:
${{ inputs.platform }}
enable_mypy
:
'
false'
# already covered by CPU tests
hf_token
:
${{ secrets.HF_TOKEN }}
parallel_mode
:
'
none'
dind_as_sidecar
:
'
true'
# ============================================================================
# COPY TO ACR
# ============================================================================
copy-to-acr
:
copy-to-acr
:
needs
:
[
build
,
test
]
needs
:
[
build
,
test
]
# Run if copy_to_acr is true AND build succeeded AND (test succeeded OR test was skipped)
# Run if copy_to_acr is true AND build succeeded AND (test succeeded OR test was skipped)
...
...
.github/workflows/pr.yaml
View file @
25b769e5
...
@@ -184,6 +184,7 @@ jobs:
...
@@ -184,6 +184,7 @@ jobs:
${{ github.ref_name == 'main' && 'main-vllm' || '' }}
${{ github.ref_name == 'main' && 'main-vllm' || '' }}
${{ github.ref_name == 'main' && format('main-vllm-{0}', github.sha) || '' }}
${{ github.ref_name == 'main' && format('main-vllm-{0}', github.sha) || '' }}
builder_name
:
${{ needs.changed-files.outputs.builder_name }}
builder_name
:
${{ needs.changed-files.outputs.builder_name }}
run_multi_gpu_tests
:
true
test_gpu_timeout_minutes
:
35
test_gpu_timeout_minutes
:
35
build_timeout_minutes
:
${{ github.ref_name == 'main' && 120 || 60 }}
build_timeout_minutes
:
${{ github.ref_name == 'main' && 120 || 60 }}
secrets
:
inherit
secrets
:
inherit
...
@@ -204,6 +205,7 @@ jobs:
...
@@ -204,6 +205,7 @@ jobs:
${{ github.ref_name == 'main' && 'main-sglang' || '' }}
${{ github.ref_name == 'main' && 'main-sglang' || '' }}
${{ github.ref_name == 'main' && format('main-sglang-{0}', github.sha) || '' }}
${{ github.ref_name == 'main' && format('main-sglang-{0}', github.sha) || '' }}
builder_name
:
${{ needs.changed-files.outputs.builder_name }}
builder_name
:
${{ needs.changed-files.outputs.builder_name }}
run_multi_gpu_tests
:
true
build_timeout_minutes
:
${{ github.ref_name == 'main' && 120 || 60 }}
build_timeout_minutes
:
${{ github.ref_name == 'main' && 120 || 60 }}
secrets
:
inherit
secrets
:
inherit
...
@@ -223,6 +225,7 @@ jobs:
...
@@ -223,6 +225,7 @@ jobs:
${{ github.ref_name == 'main' && 'main-trtllm' || '' }}
${{ github.ref_name == 'main' && 'main-trtllm' || '' }}
${{ github.ref_name == 'main' && format('main-trtllm-{0}', github.sha) || '' }}
${{ github.ref_name == 'main' && format('main-trtllm-{0}', github.sha) || '' }}
builder_name
:
${{ needs.changed-files.outputs.builder_name }}
builder_name
:
${{ needs.changed-files.outputs.builder_name }}
run_multi_gpu_tests
:
true
build_timeout_minutes
:
${{ github.ref_name == 'main' && 120 || 60 }}
build_timeout_minutes
:
${{ github.ref_name == 'main' && 120 || 60 }}
secrets
:
inherit
secrets
:
inherit
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment