Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d8606012
Unverified
Commit
d8606012
authored
Mar 27, 2026
by
wenjun liu
Committed by
GitHub
Mar 26, 2026
Browse files
[CI/Build] enable Intel XPU test flow with prebuilt image (#37447)
Signed-off-by:
wendyliu235
<
wenjun.liu@intel.com
>
parent
f73bcb1c
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
397 additions
and
0 deletions
+397
-0
.buildkite/ci_config_intel.yaml
.buildkite/ci_config_intel.yaml
+23
-0
.buildkite/image_build/image_build_xpu.sh
.buildkite/image_build/image_build_xpu.sh
+34
-0
.buildkite/intel_jobs/test-intel.yaml
.buildkite/intel_jobs/test-intel.yaml
+64
-0
.buildkite/scripts/hardware_ci/run-intel-test.sh
.buildkite/scripts/hardware_ci/run-intel-test.sh
+276
-0
No files found.
.buildkite/ci_config_intel.yaml
0 → 100644
View file @
d8606012
name
:
vllm_intel_ci
job_dirs
:
-
"
.buildkite/intel_jobs"
run_all_patterns
:
-
"
docker/Dockerfile"
-
"
CMakeLists.txt"
-
"
requirements/common.txt"
-
"
requirements/xpu.txt"
-
"
requirements/build.txt"
-
"
requirements/test.txt"
-
"
setup.py"
-
"
csrc/"
-
"
cmake/"
run_all_exclude_patterns
:
-
"
docker/Dockerfile."
-
"
csrc/cpu/"
-
"
csrc/rocm/"
-
"
cmake/hipify.py"
-
"
cmake/cpu_extension.cmake"
registries
:
public.ecr.aws/q9t5s3a7
repositories
:
main
:
"
vllm-ci-test-repo"
premerge
:
"
vllm-ci-test-repo"
.buildkite/image_build/image_build_xpu.sh
0 → 100755
View file @
d8606012
#!/bin/bash
set
-e
if
[[
$#
-lt
3
]]
;
then
echo
"Usage:
$0
<registry> <repo> <commit>"
exit
1
fi
REGISTRY
=
$1
REPO
=
$2
BUILDKITE_COMMIT
=
$3
# authenticate with AWS ECR
aws ecr-public get-login-password
--region
us-east-1 | docker login
--username
AWS
--password-stdin
"
$REGISTRY
"
aws ecr get-login-password
--region
us-east-1 | docker login
--username
AWS
--password-stdin
936637512419.dkr.ecr.us-east-1.amazonaws.com
# skip build if image already exists
if
!
docker manifest inspect
"
$REGISTRY
"
/
"
$REPO
"
:
"
$BUILDKITE_COMMIT
"
-xpu
&> /dev/null
;
then
echo
"Image not found, proceeding with build..."
else
echo
"Image found"
exit
0
fi
# build
docker build
\
--file
docker/Dockerfile.xpu
\
--build-arg
max_jobs
=
16
\
--build-arg
buildkite_commit
=
"
$BUILDKITE_COMMIT
"
\
--tag
"
$REGISTRY
"
/
"
$REPO
"
:
"
$BUILDKITE_COMMIT
"
-xpu
\
--progress
plain
.
# push
docker push
"
$REGISTRY
"
/
"
$REPO
"
:
"
$BUILDKITE_COMMIT
"
-xpu
.buildkite/intel_jobs/test-intel.yaml
0 → 100644
View file @
d8606012
group
:
Intel
steps
:
-
label
:
"
:docker:
Build
XPU
image"
soft_fail
:
true
depends_on
:
[]
key
:
image-build-xpu
commands
:
-
bash -lc '.buildkite/image_build/image_build_xpu.sh "public.ecr.aws/q9t5s3a7" "vllm-ci-test-repo" "$BUILDKITE_COMMIT"'
env
:
DOCKER_BUILDKIT
:
"
1"
retry
:
automatic
:
-
exit_status
:
-1
# Agent was lost
limit
:
2
-
exit_status
:
-10
# Agent was lost
limit
:
2
-
label
:
"
XPU
example
Test"
depends_on
:
-
image-build-xpu
timeout_in_minutes
:
30
device
:
intel_gpu
no_plugin
:
true
env
:
REGISTRY
:
"
public.ecr.aws/q9t5s3a7"
REPO
:
"
vllm-ci-test-repo"
source_file_dependencies
:
-
vllm/
-
.buildkite/intel_jobs/test-intel.yaml
commands
:
-
>-
bash .buildkite/scripts/hardware_ci/run-intel-test.sh
'pip install tblib==3.1.0 &&
python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager &&
python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 -O3 -cc.cudagraph_mode=NONE &&
python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager -tp 2 --distributed-executor-backend mp &&
python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager --attention-backend=TRITON_ATTN &&
python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager --quantization fp8 &&
python3 examples/basic/offline_inference/generate.py --model superjob/Qwen3-4B-Instruct-2507-GPTQ-Int4 --block-size 64 --enforce-eager --max-model-len 8192 &&
python3 examples/basic/offline_inference/generate.py --model ibm-research/PowerMoE-3b --block-size 64 --enforce-eager -tp 2 &&
python3 examples/basic/offline_inference/generate.py --model ibm-research/PowerMoE-3b --block-size 64 --enforce-eager -tp 2 --enable-expert-parallel'
-
label
:
"
XPU
V1
test"
depends_on
:
-
image-build-xpu
timeout_in_minutes
:
30
device
:
intel_gpu
no_plugin
:
true
env
:
REGISTRY
:
"
public.ecr.aws/q9t5s3a7"
REPO
:
"
vllm-ci-test-repo"
source_file_dependencies
:
-
vllm/
-
.buildkite/intel_jobs/test-intel.yaml
commands
:
-
>-
bash .buildkite/scripts/hardware_ci/run-intel-test.sh
'cd tests &&
pytest -v -s v1/core --ignore=v1/core/test_reset_prefix_cache_e2e.py --ignore=v1/core/test_scheduler_e2e.py &&
pytest -v -s v1/engine --ignore=v1/engine/test_output_processor.py &&
pytest -v -s v1/sample --ignore=v1/sample/test_logprobs.py --ignore=v1/sample/test_logprobs_e2e.py &&
pytest -v -s v1/worker --ignore=v1/worker/test_gpu_model_runner.py --ignore=v1/worker/test_worker_memory_snapshot.py &&
pytest -v -s v1/structured_output &&
pytest -v -s v1/test_serial_utils.py &&
pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_tree_attention.py --ignore=v1/spec_decode/test_speculators_eagle3.py --ignore=v1/spec_decode/test_acceptance_length.py &&
pytest -v -s v1/kv_connector/unit --ignore=v1/kv_connector/unit/test_multi_connector.py --ignore=v1/kv_connector/unit/test_nixl_connector.py --ignore=v1/kv_connector/unit/test_example_connector.py --ignore=v1/kv_connector/unit/test_lmcache_integration.py'
.buildkite/scripts/hardware_ci/run-intel-test.sh
0 → 100755
View file @
d8606012
#!/bin/bash
# This script runs tests inside the Intel XPU docker container.
# It mirrors the structure of run-amd-test.sh while keeping Intel-specific
# container setup and allowing commands to be sourced from YAML or env.
#
# Command sources (in priority order):
# 1) VLLM_TEST_COMMANDS env var (preferred, preserves quoting)
# 2) Positional args (legacy)
# 3) One or more YAML files with a commands list (test-area style)
###############################################################################
set
-o
pipefail
DRY_RUN
=
${
DRY_RUN
:-
0
}
if
[[
"
${
1
:-}
"
==
"--dry-run"
]]
;
then
DRY_RUN
=
1
shift
fi
# Export Python path
export
PYTHONPATH
=
".."
###############################################################################
# Helper Functions
###############################################################################
cleanup_docker
()
{
docker_root
=
$(
docker info
-f
'{{.DockerRootDir}}'
)
if
[
-z
"
$docker_root
"
]
;
then
echo
"Failed to determine Docker root directory."
>
&2
exit
1
fi
echo
"Docker root directory:
$docker_root
"
disk_usage
=
$(
df
"
$docker_root
"
|
tail
-1
|
awk
'{print $5}'
|
sed
's/%//'
)
threshold
=
70
if
[
"
$disk_usage
"
-gt
"
$threshold
"
]
;
then
echo
"Disk usage is above
$threshold
%. Cleaning up Docker images and volumes..."
docker image prune
-f
docker volume prune
-f
&&
docker system prune
--force
--filter
"until=72h"
--all
echo
"Docker images and volumes cleanup completed."
else
echo
"Disk usage is below
$threshold
%. No cleanup needed."
fi
}
re_quote_pytest_markers
()
{
local
input
=
"
$1
"
local
output
=
""
local
collecting
=
false
local
marker_buf
=
""
local
flat
=
"
${
input
//
$'
\n
'
/
}
"
local
restore_glob
restore_glob
=
"
$(
shopt
-p
-o
noglob 2>/dev/null
||
true
)
"
set
-o
noglob
local
-a
words
read
-ra
words
<<<
"
$flat
"
eval
"
$restore_glob
"
for
word
in
"
${
words
[@]
}
"
;
do
if
$collecting
;
then
if
[[
"
$word
"
==
*
"'"
*
]]
;
then
if
[[
-n
"
$marker_buf
"
]]
;
then
output+
=
"
${
marker_buf
}
"
marker_buf
=
""
fi
output+
=
"
${
word
}
"
collecting
=
false
continue
fi
local
is_boundary
=
false
case
"
$word
"
in
"&&"
|
"||"
|
";"
|
"|"
)
is_boundary
=
true
;;
--
*
)
is_boundary
=
true
;;
-[a-zA-Z]
)
is_boundary
=
true
;;
*
/
*
)
is_boundary
=
true
;;
*
.py|
*
.py::
*
)
is_boundary
=
true
;;
*
=
*
)
if
[[
"
$word
"
=
~ ^[A-Z_][A-Z0-9_]
*
=
]]
;
then
is_boundary
=
true
fi
;;
esac
if
$is_boundary
;
then
if
[[
"
$marker_buf
"
==
*
" "
*
||
"
$marker_buf
"
==
*
"("
*
]]
;
then
output+
=
"'
${
marker_buf
}
' "
else
output+
=
"
${
marker_buf
}
"
fi
collecting
=
false
marker_buf
=
""
if
[[
"
$word
"
==
"-m"
||
"
$word
"
==
"-k"
]]
;
then
output+
=
"
${
word
}
"
collecting
=
true
else
output+
=
"
${
word
}
"
fi
else
if
[[
-n
"
$marker_buf
"
]]
;
then
marker_buf+
=
"
${
word
}
"
else
marker_buf
=
"
${
word
}
"
fi
fi
elif
[[
"
$word
"
==
"-m"
||
"
$word
"
==
"-k"
]]
;
then
output+
=
"
${
word
}
"
collecting
=
true
marker_buf
=
""
else
output+
=
"
${
word
}
"
fi
done
if
$collecting
&&
[[
-n
"
$marker_buf
"
]]
;
then
if
[[
"
$marker_buf
"
==
*
" "
*
||
"
$marker_buf
"
==
*
"("
*
]]
;
then
output+
=
"'
${
marker_buf
}
'"
else
output+
=
"
${
marker_buf
}
"
fi
fi
echo
"
${
output
%
}
"
}
apply_intel_test_overrides
()
{
local
cmds
=
"
$1
"
# Placeholder for Intel-specific exclusions/overrides.
echo
"
$cmds
"
}
is_yaml_file
()
{
local
p
=
"
$1
"
[[
-f
"
$p
"
&&
"
$p
"
==
*
.yaml
]]
}
extract_yaml_commands
()
{
local
yaml_path
=
"
$1
"
awk
'
$1 == "commands:" { in_cmds=1; next }
in_cmds && $0 ~ /^[[:space:]]*-[[:space:]]/ {
sub(/^[[:space:]]*-[[:space:]]/, "");
print;
next
}
in_cmds && $0 ~ /^[^[:space:]]/ { exit }
'
"
$yaml_path
"
}
###############################################################################
# Main
###############################################################################
default_image_name
=
"
${
REGISTRY
}
/
${
REPO
}
:
${
BUILDKITE_COMMIT
}
-xpu"
#default_image_name="public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:${BUILDKITE_COMMIT}-xpu"
image_name
=
"
${
IMAGE_TAG_XPU
:-${
default_image_name
}}
"
container_name
=
"xpu_
${
BUILDKITE_COMMIT
}
_
$(
tr
-dc
A-Za-z0-9 < /dev/urandom |
head
-c
10
;
echo
)
"
# ---- Command source selection ----
commands
=
""
if
[[
-n
"
${
VLLM_TEST_COMMANDS
:-}
"
]]
;
then
commands
=
"
${
VLLM_TEST_COMMANDS
}
"
echo
"Commands sourced from VLLM_TEST_COMMANDS (quoting preserved)"
elif
[[
$#
-gt
0
]]
;
then
all_yaml
=
true
for
arg
in
"
$@
"
;
do
if
!
is_yaml_file
"
$arg
"
;
then
all_yaml
=
false
break
fi
done
if
$all_yaml
;
then
for
yaml
in
"
$@
"
;
do
mapfile
-t
COMMANDS < <
(
extract_yaml_commands
"
$yaml
"
)
if
[[
${#
COMMANDS
[@]
}
-eq
0
]]
;
then
echo
"Error: No commands found in
${
yaml
}
"
>
&2
exit
1
fi
for
cmd
in
"
${
COMMANDS
[@]
}
"
;
do
if
[[
-z
"
$commands
"
]]
;
then
commands
=
"
${
cmd
}
"
else
commands+
=
" &&
${
cmd
}
"
fi
done
done
echo
"Commands sourced from YAML files:
$*
"
else
commands
=
"
$*
"
echo
"Commands sourced from positional args (legacy mode)"
fi
else
SCRIPT_DIR
=
"
$(
cd
"
$(
dirname
"
${
BASH_SOURCE
[0]
}
"
)
"
&&
pwd
)
"
DEFAULT_YAML
=
"
${
SCRIPT_DIR
}
/intel-test.yaml"
if
[[
!
-f
"
${
DEFAULT_YAML
}
"
]]
;
then
echo
"Error: YAML file not found:
${
DEFAULT_YAML
}
"
>
&2
exit
1
fi
mapfile
-t
COMMANDS < <
(
extract_yaml_commands
"
${
DEFAULT_YAML
}
"
)
if
[[
${#
COMMANDS
[@]
}
-eq
0
]]
;
then
echo
"Error: No commands found in
${
DEFAULT_YAML
}
"
>
&2
exit
1
fi
for
cmd
in
"
${
COMMANDS
[@]
}
"
;
do
if
[[
-z
"
$commands
"
]]
;
then
commands
=
"
${
cmd
}
"
else
commands+
=
" &&
${
cmd
}
"
fi
done
echo
"Commands sourced from default YAML:
${
DEFAULT_YAML
}
"
fi
if
[[
-z
"
$commands
"
]]
;
then
echo
"Error: No test commands provided."
>
&2
exit
1
fi
echo
"Raw commands:
$commands
"
commands
=
$(
re_quote_pytest_markers
"
$commands
"
)
echo
"After re-quoting:
$commands
"
commands
=
$(
apply_intel_test_overrides
"
$commands
"
)
echo
"Final commands:
$commands
"
# Dry-run mode prints final commands and exits before Docker.
if
[[
"
$DRY_RUN
"
==
"1"
]]
;
then
echo
"DRY_RUN=1 set, skipping Docker execution."
exit
0
fi
# --- Docker housekeeping ---
cleanup_docker
# --- Build or pull test image ---
if
[[
-n
"
${
IMAGE_TAG_XPU
:-}
"
]]
;
then
echo
"Using prebuilt XPU image:
${
IMAGE_TAG_XPU
}
"
docker pull
"
${
IMAGE_TAG_XPU
}
"
else
echo
"Using prebuilt XPU image:
${
image_name
}
"
docker pull
"
${
image_name
}
"
fi
remove_docker_container
()
{
docker
rm
-f
"
${
container_name
}
"
||
true
docker image
rm
-f
"
${
image_name
}
"
||
true
docker system prune
-f
||
true
}
trap
remove_docker_container EXIT
# --- Single-node job ---
if
[[
-z
"
${
ZE_AFFINITY_MASK
:-}
"
]]
;
then
echo
"Warning: ZE_AFFINITY_MASK is not set. Proceeding without device affinity."
>
&2
fi
docker run
\
--device
/dev/dri:/dev/dri
\
--net
=
host
\
--ipc
=
host
\
--privileged
\
-v
/dev/dri/by-path:/dev/dri/by-path
\
--entrypoint
=
""
\
-e
"HF_TOKEN=
${
HF_TOKEN
:-}
"
\
-e
"ZE_AFFINITY_MASK=
${
ZE_AFFINITY_MASK
:-}
"
\
-e
"CMDS=
${
commands
}
"
\
--name
"
${
container_name
}
"
\
"
${
image_name
}
"
\
bash
-c
'set -e; echo "ZE_AFFINITY_MASK is ${ZE_AFFINITY_MASK:-}"; eval "$CMDS"'
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment