Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b569620f
Unverified
Commit
b569620f
authored
Apr 14, 2026
by
Andrey Talman
Committed by
GitHub
Apr 14, 2026
Browse files
[CI] Add PyTorch nightly build and test pipeline (#37226)
Signed-off-by:
atalman
<
atalman@fb.com
>
parent
65b98089
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
83 additions
and
9 deletions
+83
-9
.buildkite/image_build/image_build_torch_nightly.sh
.buildkite/image_build/image_build_torch_nightly.sh
+68
-0
.buildkite/test_areas/models_basic.yaml
.buildkite/test_areas/models_basic.yaml
+7
-4
.buildkite/test_areas/models_language.yaml
.buildkite/test_areas/models_language.yaml
+8
-5
No files found.
.buildkite/image_build/image_build_torch_nightly.sh
0 → 100755
View file @
b569620f
#!/bin/bash
set
-euo
pipefail
# Build a vLLM test image with PyTorch nightly installed.
# Called by the pipeline generator's "vLLM Against PyTorch Nightly" group.
if
[[
$#
-lt
5
]]
;
then
echo
"Usage:
$0
<registry> <repo> <commit> <branch> <image_tag>"
exit
1
fi
REGISTRY
=
$1
REPO
=
$2
BUILDKITE_COMMIT
=
$3
BRANCH
=
$4
IMAGE_TAG
=
$5
# --- Arguments ---
echo
"--- :mag: Arguments"
echo
"REGISTRY:
${
REGISTRY
}
"
echo
"REPO:
${
REPO
}
"
echo
"BUILDKITE_COMMIT:
${
BUILDKITE_COMMIT
}
"
echo
"BRANCH:
${
BRANCH
}
"
echo
"IMAGE_TAG:
${
IMAGE_TAG
}
"
# --- ECR login ---
echo
"--- :key: ECR login"
aws ecr-public get-login-password
--region
us-east-1
\
| docker login
--username
AWS
--password-stdin
"
$REGISTRY
"
aws ecr get-login-password
--region
us-east-1
\
| docker login
--username
AWS
--password-stdin
936637512419.dkr.ecr.us-east-1.amazonaws.com
# --- Set up buildx ---
echo
"--- :docker: Setting up buildx"
docker buildx create
--name
vllm-builder
--driver
docker-container
--use
||
true
docker buildx inspect
--bootstrap
docker buildx
ls
# --- Skip if image already exists ---
echo
"--- :mag: Checking if image already exists"
if
docker manifest inspect
"
$IMAGE_TAG
"
>
/dev/null 2>&1
;
then
echo
"Image found:
$IMAGE_TAG
— skipping build"
exit
0
fi
echo
"Image not found, proceeding with build..."
# --- CUDA 13.0 for nightly builds ---
# Nightly CI uses CUDA 13.0 while regular CI stays on CUDA 12.9
NIGHTLY_CUDA_VERSION
=
"13.0.0"
NIGHTLY_BUILD_BASE_IMAGE
=
"nvidia/cuda:
${
NIGHTLY_CUDA_VERSION
}
-devel-ubuntu22.04"
NIGHTLY_FINAL_BASE_IMAGE
=
"nvidia/cuda:
${
NIGHTLY_CUDA_VERSION
}
-base-ubuntu22.04"
echo
"--- :docker: Building torch nightly image (CUDA
${
NIGHTLY_CUDA_VERSION
}
)"
docker buildx build
--file
docker/Dockerfile
\
--build-arg
max_jobs
=
16
\
--build-arg
buildkite_commit
=
"
$BUILDKITE_COMMIT
"
\
--build-arg
USE_SCCACHE
=
1
\
--build-arg
PYTORCH_NIGHTLY
=
1
\
--build-arg
CUDA_VERSION
=
"
${
NIGHTLY_CUDA_VERSION
}
"
\
--build-arg
BUILD_BASE_IMAGE
=
"
${
NIGHTLY_BUILD_BASE_IMAGE
}
"
\
--build-arg
FINAL_BASE_IMAGE
=
"
${
NIGHTLY_FINAL_BASE_IMAGE
}
"
\
--build-arg
torch_cuda_arch_list
=
"8.0 8.9 9.0 10.0 12.0"
\
--tag
"
$IMAGE_TAG
"
\
--push
\
--target
test
\
--progress
plain
.
echo
"--- :white_check_mark: Torch nightly image build complete:
$IMAGE_TAG
"
.buildkite/test_areas/models_basic.yaml
View file @
b569620f
...
...
@@ -13,10 +13,11 @@ steps:
commands
:
# Run a subset of model initialization tests
-
pytest -v -s models/test_initialization.py::test_can_initialize_small_subset
mirror
:
torch_nightly
:
{}
-
label
:
Basic Models Tests (Extra Initialization) %N
timeout_in_minutes
:
45
torch_nightly
:
true
source_file_dependencies
:
-
vllm/model_executor/models/
-
tests/models/test_initialization.py
...
...
@@ -27,6 +28,8 @@ steps:
# test.) Also run if model initialization test file is modified
-
pytest -v -s models/test_initialization.py -k 'not test_can_initialize_small_subset' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
parallelism
:
2
mirror
:
torch_nightly
:
{}
-
label
:
Basic Models Tests (Other)
timeout_in_minutes
:
45
...
...
.buildkite/test_areas/models_language.yaml
View file @
b569620f
...
...
@@ -4,7 +4,6 @@ depends_on:
steps
:
-
label
:
Language Models Tests (Standard)
timeout_in_minutes
:
25
torch_nightly
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/language
...
...
@@ -12,10 +11,11 @@ steps:
# Test standard language models, excluding a subset of slow tests
-
pip freeze | grep -E 'torch'
-
pytest -v -s models/language -m 'core_model and (not slow_test)'
mirror
:
torch_nightly
:
{}
-
label
:
Language Models Tests (Extra Standard) %N
timeout_in_minutes
:
45
torch_nightly
:
true
source_file_dependencies
:
-
vllm/model_executor/models/
-
tests/models/language/pooling/test_embedding.py
...
...
@@ -27,10 +27,11 @@ steps:
-
pip freeze | grep -E 'torch'
-
pytest -v -s models/language -m 'core_model and slow_test' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
parallelism
:
2
mirror
:
torch_nightly
:
{}
-
label
:
Language Models Tests (Hybrid) %N
timeout_in_minutes
:
75
torch_nightly
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/language/generation
...
...
@@ -42,6 +43,8 @@ steps:
# Shard hybrid language model tests
-
pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
parallelism
:
2
mirror
:
torch_nightly
:
{}
-
label
:
Language Models Test (Extended Generation)
# 80min
timeout_in_minutes
:
110
...
...
@@ -62,7 +65,7 @@ steps:
-
image-build-amd
commands
:
-
uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@fix-rocm-7.0-warp-size-constexpr'
-
uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.
5.2
'
-
uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.
6.0
'
-
pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)'
-
label
:
Language Models Test (PPL)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment