Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
raojy
vllm_017
Commits
3b50924c
Commit
3b50924c
authored
Mar 27, 2026
by
raojy
Browse files
raw_vllm
parent
fbeb8a6f
Pipeline
#3455
canceled with stages
Changes
144
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
746 additions
and
0 deletions
+746
-0
.buildkite/.pipeline_gen_v2
.buildkite/.pipeline_gen_v2
+0
-0
.buildkite/check-wheel-size.py
.buildkite/check-wheel-size.py
+53
-0
.buildkite/ci_config.yaml
.buildkite/ci_config.yaml
+25
-0
.buildkite/hardware_tests/amd.yaml
.buildkite/hardware_tests/amd.yaml
+30
-0
.buildkite/hardware_tests/ascend_npu.yaml
.buildkite/hardware_tests/ascend_npu.yaml
+10
-0
.buildkite/hardware_tests/cpu.yaml
.buildkite/hardware_tests/cpu.yaml
+100
-0
.buildkite/hardware_tests/gh200.yaml
.buildkite/hardware_tests/gh200.yaml
+10
-0
.buildkite/hardware_tests/intel.yaml
.buildkite/hardware_tests/intel.yaml
+17
-0
.buildkite/image_build/image_build.sh
.buildkite/image_build/image_build.sh
+255
-0
.buildkite/image_build/image_build.yaml
.buildkite/image_build/image_build.yaml
+58
-0
.buildkite/image_build/image_build_cpu.sh
.buildkite/image_build/image_build_cpu.sh
+36
-0
.buildkite/image_build/image_build_cpu_arm64.sh
.buildkite/image_build/image_build_cpu_arm64.sh
+33
-0
.buildkite/image_build/image_build_hpu.sh
.buildkite/image_build/image_build_hpu.sh
+34
-0
.buildkite/lm-eval-harness/configs/DeepSeek-V2-Lite-Chat.yaml
...ldkite/lm-eval-harness/configs/DeepSeek-V2-Lite-Chat.yaml
+13
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform.yaml
.../configs/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform.yaml
+12
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct.yaml
...te/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct.yaml
+12
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
...a-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
+12
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform.yaml
...s/configs/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform.yaml
+12
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
...figs/Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
+12
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8.yaml
...lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8.yaml
+12
-0
No files found.
.buildkite/.pipeline_gen_v2
0 → 100644
View file @
3b50924c
.buildkite/check-wheel-size.py
0 → 100644
View file @
3b50924c
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
os
import
sys
import
zipfile
# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 500 MiB
# Note that we have 800 MiB quota, please use it wisely.
# See https://github.com/pypi/support/issues/6326 .
# Please also sync the value with the one in Dockerfile.
VLLM_MAX_SIZE_MB
=
int
(
os
.
environ
.
get
(
"VLLM_MAX_SIZE_MB"
,
500
))
def
print_top_10_largest_files
(
zip_file
):
"""Print the top 10 largest files in the given zip file."""
with
zipfile
.
ZipFile
(
zip_file
,
"r"
)
as
z
:
file_sizes
=
[(
f
,
z
.
getinfo
(
f
).
file_size
)
for
f
in
z
.
namelist
()]
file_sizes
.
sort
(
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
for
f
,
size
in
file_sizes
[:
10
]:
print
(
f
"
{
f
}
:
{
size
/
(
1024
*
1024
):.
2
f
}
MBs uncompressed."
)
def
check_wheel_size
(
directory
):
"""Check the size of .whl files in the given directory."""
for
root
,
_
,
files
in
os
.
walk
(
directory
):
for
file_name
in
files
:
if
file_name
.
endswith
(
".whl"
):
wheel_path
=
os
.
path
.
join
(
root
,
file_name
)
wheel_size_mb
=
os
.
path
.
getsize
(
wheel_path
)
/
(
1024
*
1024
)
if
wheel_size_mb
>
VLLM_MAX_SIZE_MB
:
print
(
f
"Not allowed: Wheel
{
wheel_path
}
is larger "
f
"(
{
wheel_size_mb
:.
2
f
}
MB) than the limit "
f
"(
{
VLLM_MAX_SIZE_MB
}
MB)."
)
print_top_10_largest_files
(
wheel_path
)
return
1
else
:
print
(
f
"Wheel
{
wheel_path
}
is within the allowed size "
f
"(
{
wheel_size_mb
:.
2
f
}
MB)."
)
return
0
if
__name__
==
"__main__"
:
if
len
(
sys
.
argv
)
<
2
:
print
(
"Usage: python check-wheel-size.py <directory>"
)
sys
.
exit
(
1
)
directory
=
sys
.
argv
[
1
]
sys
.
exit
(
check_wheel_size
(
directory
))
.buildkite/ci_config.yaml
0 → 100644
View file @
3b50924c
name
:
vllm_ci
job_dirs
:
-
"
.buildkite/image_build"
-
"
.buildkite/test_areas"
-
"
.buildkite/hardware_tests"
run_all_patterns
:
-
"
docker/Dockerfile"
-
"
CMakeLists.txt"
-
"
requirements/common.txt"
-
"
requirements/cuda.txt"
-
"
requirements/build.txt"
-
"
requirements/test.txt"
-
"
setup.py"
-
"
csrc/"
-
"
cmake/"
run_all_exclude_patterns
:
-
"
docker/Dockerfile."
-
"
csrc/cpu/"
-
"
csrc/rocm/"
-
"
cmake/hipify.py"
-
"
cmake/cpu_extension.cmake"
registries
:
public.ecr.aws/q9t5s3a7
repositories
:
main
:
"
vllm-ci-postmerge-repo"
premerge
:
"
vllm-ci-test-repo"
.buildkite/hardware_tests/amd.yaml
0 → 100644
View file @
3b50924c
group
:
Hardware - AMD Build
steps
:
-
label
:
"
AMD:
:docker:
build
image"
key
:
image-build-amd
depends_on
:
[]
device
:
amd_cpu
no_plugin
:
true
commands
:
-
>
docker build
--build-arg max_jobs=16
--build-arg REMOTE_VLLM=1
--build-arg ARG_PYTORCH_ROCM_ARCH='gfx942;gfx950'
--build-arg VLLM_BRANCH=$BUILDKITE_COMMIT
--tag "rocm/vllm-ci:${BUILDKITE_COMMIT}"
-f docker/Dockerfile.rocm
--target test
--no-cache
--progress plain .
-
docker push "rocm/vllm-ci:${BUILDKITE_COMMIT}"
env
:
DOCKER_BUILDKIT
:
"
1"
retry
:
automatic
:
-
exit_status
:
-1
# Agent was lost
limit
:
1
-
exit_status
:
-10
# Agent was lost
limit
:
1
-
exit_status
:
1
# Machine occasionally fail
limit
:
1
.buildkite/hardware_tests/ascend_npu.yaml
0 → 100644
View file @
3b50924c
group
:
Hardware
depends_on
:
~
steps
:
-
label
:
"
Ascend
NPU
Test"
soft_fail
:
true
timeout_in_minutes
:
20
no_plugin
:
true
device
:
ascend_npu
commands
:
-
bash .buildkite/scripts/hardware_ci/run-npu-test.sh
.buildkite/hardware_tests/cpu.yaml
0 → 100644
View file @
3b50924c
group
:
CPU
depends_on
:
[]
steps
:
-
label
:
CPU-Kernel Tests
depends_on
:
[]
soft_fail
:
true
device
:
intel_cpu
no_plugin
:
true
source_file_dependencies
:
-
csrc/cpu/
-
cmake/cpu_extension.cmake
-
CMakeLists.txt
-
vllm/_custom_ops.py
-
tests/kernels/attention/test_cpu_attn.py
-
tests/kernels/moe/test_cpu_fused_moe.py
-
tests/kernels/test_onednn.py
commands
:
-
|
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
pytest -x -v -s tests/kernels/attention/test_cpu_attn.py
pytest -x -v -s tests/kernels/moe/test_cpu_fused_moe.py
pytest -x -v -s tests/kernels/test_onednn.py"
-
label
:
CPU-Language Generation and Pooling Model Tests
depends_on
:
[]
soft_fail
:
true
device
:
intel_cpu
no_plugin
:
true
source_file_dependencies
:
-
csrc/cpu/
-
vllm/
-
tests/models/language/generation/
-
tests/models/language/pooling/
commands
:
-
|
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 30m "
pytest -x -v -s tests/models/language/generation -m cpu_model
pytest -x -v -s tests/models/language/pooling -m cpu_model"
-
label
:
CPU-Quantization Model Tests
depends_on
:
[]
soft_fail
:
true
device
:
intel_cpu
no_plugin
:
true
source_file_dependencies
:
-
csrc/cpu/
-
vllm/model_executor/layers/quantization/cpu_wna16.py
-
vllm/model_executor/layers/quantization/gptq_marlin.py
-
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py
-
vllm/model_executor/layers/quantization/kernels/scaled_mm/cpu.py
-
vllm/model_executor/layers/quantization/kernels/mixed_precision/cpu.py
-
tests/quantization/test_compressed_tensors.py
-
tests/quantization/test_cpu_wna16.py
commands
:
-
|
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
pytest -x -v -s tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_logprobs
pytest -x -v -s tests/quantization/test_cpu_wna16.py"
-
label
:
CPU-Distributed Tests
depends_on
:
[]
soft_fail
:
true
device
:
intel_cpu
no_plugin
:
true
source_file_dependencies
:
-
csrc/cpu/shm.cpp
-
vllm/v1/worker/cpu_worker.py
-
vllm/v1/worker/gpu_worker.py
-
vllm/v1/worker/cpu_model_runner.py
-
vllm/v1/worker/gpu_model_runner.py
-
vllm/platforms/cpu.py
-
vllm/distributed/parallel_state.py
-
vllm/distributed/device_communicators/cpu_communicator.py
commands
:
-
|
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 10m "
bash .buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh"
-
label
:
CPU-Multi-Modal Model Tests %N
depends_on
:
[]
soft_fail
:
true
device
:
intel_cpu
no_plugin
:
true
source_file_dependencies
:
# - vllm/
-
vllm/model_executor/layers/rotary_embedding
-
tests/models/multimodal/generation/
commands
:
-
|
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 45m "
pytest -x -v -s tests/models/multimodal/generation --ignore=tests/models/multimodal/generation/test_pixtral.py -m cpu_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB"
parallelism
:
2
-
label
:
"
Arm
CPU
Test"
depends_on
:
[]
soft_fail
:
true
device
:
arm_cpu
no_plugin
:
true
commands
:
-
bash .buildkite/scripts/hardware_ci/run-cpu-test-arm.sh
.buildkite/hardware_tests/gh200.yaml
0 → 100644
View file @
3b50924c
group
:
Hardware
steps
:
-
label
:
"
GH200
Test"
soft_fail
:
true
device
:
gh200
no_plugin
:
true
optional
:
true
commands
:
-
nvidia-smi
-
bash .buildkite/scripts/hardware_ci/run-gh200-test.sh
.buildkite/hardware_tests/intel.yaml
0 → 100644
View file @
3b50924c
group
:
Hardware
depends_on
:
~
steps
:
-
label
:
"
Intel
HPU
Test"
soft_fail
:
true
device
:
intel_hpu
no_plugin
:
true
commands
:
-
bash .buildkite/scripts/hardware_ci/run-hpu-test.sh
-
label
:
"
Intel
GPU
Test"
depends_on
:
[]
soft_fail
:
true
device
:
intel_gpu
no_plugin
:
true
commands
:
-
bash .buildkite/scripts/hardware_ci/run-xpu-test.sh
.buildkite/image_build/image_build.sh
0 → 100644
View file @
3b50924c
#!/bin/bash
set
-euo
pipefail
# replace invalid characters in Docker image tags and truncate to 128 chars
clean_docker_tag
()
{
local
input
=
"
$1
"
echo
"
$input
"
|
sed
's/[^a-zA-Z0-9._-]/_/g'
|
cut
-c1-128
}
print_usage_and_exit
()
{
echo
"Usage:
$0
<registry> <repo> <commit> <branch> <image_tag> [<image_tag_latest>]"
exit
1
}
print_instance_info
()
{
echo
""
echo
"=== Debug: Instance Information ==="
# Get IMDSv2 token
if
TOKEN
=
$(
curl
-s
-X
PUT
"http://169.254.169.254/latest/api/token"
\
-H
"X-aws-ec2-metadata-token-ttl-seconds: 21600"
2>/dev/null
)
;
then
AMI_ID
=
$(
curl
-s
-H
"X-aws-ec2-metadata-token:
$TOKEN
"
\
http://169.254.169.254/latest/meta-data/ami-id 2>/dev/null
||
echo
"unknown"
)
INSTANCE_TYPE
=
$(
curl
-s
-H
"X-aws-ec2-metadata-token:
$TOKEN
"
\
http://169.254.169.254/latest/meta-data/instance-type 2>/dev/null
||
echo
"unknown"
)
INSTANCE_ID
=
$(
curl
-s
-H
"X-aws-ec2-metadata-token:
$TOKEN
"
\
http://169.254.169.254/latest/meta-data/instance-id 2>/dev/null
||
echo
"unknown"
)
AZ
=
$(
curl
-s
-H
"X-aws-ec2-metadata-token:
$TOKEN
"
\
http://169.254.169.254/latest/meta-data/placement/availability-zone 2>/dev/null
||
echo
"unknown"
)
echo
"AMI ID:
${
AMI_ID
}
"
echo
"Instance Type:
${
INSTANCE_TYPE
}
"
echo
"Instance ID:
${
INSTANCE_ID
}
"
echo
"AZ:
${
AZ
}
"
else
echo
"Not running on EC2 or IMDS not available"
fi
# Check for warm cache AMI (marker file baked into custom AMI)
if
[[
-f
/etc/vllm-ami-info
]]
;
then
echo
"Cache: warm (custom vLLM AMI)"
cat
/etc/vllm-ami-info
else
echo
"Cache: cold (standard AMI)"
fi
echo
"==================================="
echo
""
}
setup_buildx_builder
()
{
echo
"--- :buildkite: Setting up buildx builder"
if
[[
-S
"
${
BUILDKIT_SOCKET
}
"
]]
;
then
# Custom AMI with standalone buildkitd - use remote driver for warm cache
echo
"✅ Found local buildkitd socket at
${
BUILDKIT_SOCKET
}
"
echo
"Using remote driver to connect to buildkitd (warm cache available)"
if
docker buildx inspect baked-vllm-builder
>
/dev/null 2>&1
;
then
echo
"Using existing baked-vllm-builder"
docker buildx use baked-vllm-builder
else
echo
"Creating baked-vllm-builder with remote driver"
docker buildx create
\
--name
baked-vllm-builder
\
--driver
remote
\
--use
\
"unix://
${
BUILDKIT_SOCKET
}
"
fi
docker buildx inspect
--bootstrap
elif
docker buildx inspect
"
${
BUILDER_NAME
}
"
>
/dev/null 2>&1
;
then
# Existing builder available
echo
"Using existing builder:
${
BUILDER_NAME
}
"
docker buildx use
"
${
BUILDER_NAME
}
"
docker buildx inspect
--bootstrap
else
# No local buildkitd, no existing builder - create new docker-container builder
echo
"No local buildkitd found, using docker-container driver"
docker buildx create
--name
"
${
BUILDER_NAME
}
"
--driver
docker-container
--use
docker buildx inspect
--bootstrap
fi
# builder info
echo
"Active builder:"
docker buildx
ls
|
grep
-E
'^\*|^NAME'
||
docker buildx
ls
}
check_and_skip_if_image_exists
()
{
if
[[
-n
"
${
IMAGE_TAG
:-}
"
]]
;
then
echo
"--- :mag: Checking if image exists"
if
docker manifest inspect
"
${
IMAGE_TAG
}
"
>
/dev/null 2>&1
;
then
echo
"Image already exists:
${
IMAGE_TAG
}
"
echo
"Skipping build"
exit
0
fi
echo
"Image not found, proceeding with build"
fi
}
ecr_login
()
{
aws ecr-public get-login-password
--region
us-east-1 | docker login
--username
AWS
--password-stdin
"
$REGISTRY
"
aws ecr get-login-password
--region
us-east-1 | docker login
--username
AWS
--password-stdin
936637512419.dkr.ecr.us-east-1.amazonaws.com
}
prepare_cache_tags
()
{
# resolve and set: CACHE_TO, CACHE_FROM, CACHE_FROM_BASE_BRANCH, CACHE_FROM_MAIN
TEST_CACHE_ECR
=
"936637512419.dkr.ecr.us-east-1.amazonaws.com/vllm-ci-test-cache"
MAIN_CACHE_ECR
=
"936637512419.dkr.ecr.us-east-1.amazonaws.com/vllm-ci-postmerge-cache"
if
[[
"
$BUILDKITE_PULL_REQUEST
"
==
"false"
]]
;
then
if
[[
"
$BUILDKITE_BRANCH
"
==
"main"
]]
;
then
cache
=
"
${
MAIN_CACHE_ECR
}
:latest"
else
clean_branch
=
$(
clean_docker_tag
"
$BUILDKITE_BRANCH
"
)
cache
=
"
${
TEST_CACHE_ECR
}
:
${
clean_branch
}
"
fi
CACHE_TO
=
"
$cache
"
CACHE_FROM
=
"
$cache
"
CACHE_FROM_BASE_BRANCH
=
"
$cache
"
else
CACHE_TO
=
"
${
TEST_CACHE_ECR
}
:pr-
${
BUILDKITE_PULL_REQUEST
}
"
CACHE_FROM
=
"
${
TEST_CACHE_ECR
}
:pr-
${
BUILDKITE_PULL_REQUEST
}
"
if
[[
"
$BUILDKITE_PULL_REQUEST_BASE_BRANCH
"
==
"main"
]]
;
then
CACHE_FROM_BASE_BRANCH
=
"
${
MAIN_CACHE_ECR
}
:latest"
else
clean_base
=
$(
clean_docker_tag
"
$BUILDKITE_PULL_REQUEST_BASE_BRANCH
"
)
CACHE_FROM_BASE_BRANCH
=
"
${
TEST_CACHE_ECR
}
:
${
clean_base
}
"
fi
fi
CACHE_FROM_MAIN
=
"
${
MAIN_CACHE_ECR
}
:latest"
export
CACHE_TO CACHE_FROM CACHE_FROM_BASE_BRANCH CACHE_FROM_MAIN
}
resolve_parent_commit
()
{
if
[[
-z
"
${
PARENT_COMMIT
:-}
"
]]
;
then
PARENT_COMMIT
=
$(
git rev-parse HEAD~1 2>/dev/null
||
echo
""
)
if
[[
-n
"
${
PARENT_COMMIT
}
"
]]
;
then
echo
"Computed parent commit for cache fallback:
${
PARENT_COMMIT
}
"
export
PARENT_COMMIT
else
echo
"Could not determine parent commit (may be first commit in repo)"
fi
else
echo
"Using provided PARENT_COMMIT:
${
PARENT_COMMIT
}
"
fi
}
print_bake_config
()
{
echo
"--- :page_facing_up: Resolved bake configuration"
# Write to a temp directory to avoid polluting the repo root (which is the
# Docker build context). Files left in the repo root get COPY'd into the
# image and can cause duplicate artifact uploads from downstream steps.
local
bake_tmp
bake_tmp
=
"
$(
mktemp
-d
)
"
BAKE_CONFIG_FILE
=
"
${
bake_tmp
}
/bake-config-build-
${
BUILDKITE_BUILD_NUMBER
:-
local
}
.json"
docker buildx bake
-f
"
${
VLLM_BAKE_FILE_PATH
}
"
-f
"
${
CI_HCL_PATH
}
"
--print
"
${
TARGET
}
"
|
tee
"
${
BAKE_CONFIG_FILE
}
"
||
true
echo
"Saved bake config to
${
BAKE_CONFIG_FILE
}
"
echo
"--- :arrow_down: Uploading bake config to Buildkite"
(
cd
"
$(
dirname
"
${
BAKE_CONFIG_FILE
}
"
)
"
&&
buildkite-agent artifact upload
"
$(
basename
"
${
BAKE_CONFIG_FILE
}
"
)
"
)
}
#################################
# Main Script #
#################################
print_instance_info
if
[[
$#
-lt
5
]]
;
then
print_usage_and_exit
fi
# input args
REGISTRY
=
$1
REPO
=
$2
BUILDKITE_COMMIT
=
$3
BRANCH
=
$4
IMAGE_TAG
=
$5
IMAGE_TAG_LATEST
=
${
6
:-}
# only used for main branch, optional
# build config
TARGET
=
"test-ci"
VLLM_BAKE_FILE_PATH
=
"
${
VLLM_BAKE_FILE_PATH
:-
docker
/docker-bake.hcl
}
"
BUILDER_NAME
=
"
${
BUILDER_NAME
:-
vllm
-builder
}
"
CI_HCL_URL
=
"
${
CI_HCL_URL
:-
https
://raw.githubusercontent.com/vllm-project/ci-infra/main/docker/ci.hcl
}
"
CI_HCL_PATH
=
"/tmp/ci.hcl"
BUILDKIT_SOCKET
=
"/run/buildkit/buildkitd.sock"
prepare_cache_tags
ecr_login
# Environment info (for docs and human readers)
# VLLM_CI_BRANCH - ci-infra branch to use (default: main)
# VLLM_BAKE_FILE_PATH - Path to vLLM's bake file (default: docker/docker-bake.hcl)
# BUILDER_NAME - Name for buildx builder (default: vllm-builder)
#
# Build configuration (exported as environment variables for bake):
export
BUILDKITE_COMMIT
export
PARENT_COMMIT
export
IMAGE_TAG
export
IMAGE_TAG_LATEST
export
CACHE_FROM
export
CACHE_FROM_BASE_BRANCH
export
CACHE_FROM_MAIN
export
CACHE_TO
# print args
echo
"--- :mag: Arguments"
echo
"REGISTRY:
${
REGISTRY
}
"
echo
"REPO:
${
REPO
}
"
echo
"BUILDKITE_COMMIT:
${
BUILDKITE_COMMIT
}
"
echo
"BRANCH:
${
BRANCH
}
"
echo
"IMAGE_TAG:
${
IMAGE_TAG
}
"
echo
"IMAGE_TAG_LATEST:
${
IMAGE_TAG_LATEST
}
"
# print build configuration
echo
"--- :mag: Build configuration"
echo
"TARGET:
${
TARGET
}
"
echo
"vLLM bake file:
${
VLLM_BAKE_FILE_PATH
}
"
echo
"BUILDER_NAME:
${
BUILDER_NAME
}
"
echo
"CI_HCL_URL:
${
CI_HCL_URL
}
"
echo
"BUILDKIT_SOCKET:
${
BUILDKIT_SOCKET
}
"
echo
"--- :mag: Cache tags"
echo
"CACHE_TO:
${
CACHE_TO
}
"
echo
"CACHE_FROM:
${
CACHE_FROM
}
"
echo
"CACHE_FROM_BASE_BRANCH:
${
CACHE_FROM_BASE_BRANCH
}
"
echo
"CACHE_FROM_MAIN:
${
CACHE_FROM_MAIN
}
"
check_and_skip_if_image_exists
echo
"--- :docker: Setting up Docker buildx bake"
echo
"Target:
${
TARGET
}
"
echo
"vLLM bake file:
${
VLLM_BAKE_FILE_PATH
}
"
echo
"CI HCL path:
${
CI_HCL_PATH
}
"
if
[[
!
-f
"
${
VLLM_BAKE_FILE_PATH
}
"
]]
;
then
echo
"Error: vLLM bake file not found at
${
VLLM_BAKE_FILE_PATH
}
"
echo
"Make sure you're running from the vLLM repository root"
exit
1
fi
echo
"--- :arrow_down: Downloading ci.hcl"
curl
-sSfL
-o
"
${
CI_HCL_PATH
}
"
"
${
CI_HCL_URL
}
"
echo
"Downloaded to
${
CI_HCL_PATH
}
"
if
[[
!
-f
"
${
CI_HCL_PATH
}
"
]]
;
then
echo
"Error: ci.hcl not found at
${
CI_HCL_PATH
}
"
exit
1
fi
setup_buildx_builder
resolve_parent_commit
export
PARENT_COMMIT
print_bake_config
echo
"--- :docker: Building
${
TARGET
}
"
docker
--debug
buildx bake
-f
"
${
VLLM_BAKE_FILE_PATH
}
"
-f
"
${
CI_HCL_PATH
}
"
--progress
plain
"
${
TARGET
}
"
echo
"--- :white_check_mark: Build complete"
.buildkite/image_build/image_build.yaml
0 → 100644
View file @
3b50924c
group
:
Abuild
steps
:
-
label
:
"
:docker:
Build
image"
key
:
image-build
depends_on
:
[]
timeout_in_minutes
:
600
commands
:
-
if [[ "$BUILDKITE_BRANCH" == "main" ]]; then .buildkite/image_build/image_build.sh $REGISTRY $REPO $BUILDKITE_COMMIT $BRANCH $IMAGE_TAG $IMAGE_TAG_LATEST; else .buildkite/image_build/image_build.sh $REGISTRY $REPO $BUILDKITE_COMMIT $BRANCH $IMAGE_TAG; fi
retry
:
automatic
:
-
exit_status
:
-1
# Agent was lost
limit
:
2
-
exit_status
:
-10
# Agent was lost
limit
:
2
-
label
:
"
:docker:
Build
CPU
image"
key
:
image-build-cpu
depends_on
:
[]
commands
:
-
.buildkite/image_build/image_build_cpu.sh $REGISTRY $REPO $BUILDKITE_COMMIT
env
:
DOCKER_BUILDKIT
:
"
1"
retry
:
automatic
:
-
exit_status
:
-1
# Agent was lost
limit
:
2
-
exit_status
:
-10
# Agent was lost
limit
:
2
-
label
:
"
:docker:
Build
HPU
image"
soft_fail
:
true
depends_on
:
[]
key
:
image-build-hpu
commands
:
-
.buildkite/image_build/image_build_hpu.sh $REGISTRY $REPO $BUILDKITE_COMMIT
env
:
DOCKER_BUILDKIT
:
"
1"
retry
:
automatic
:
-
exit_status
:
-1
# Agent was lost
limit
:
2
-
exit_status
:
-10
# Agent was lost
limit
:
2
-
label
:
"
:docker:
Build
CPU
arm64
image"
key
:
cpu-arm64-image-build
depends_on
:
[]
optional
:
true
commands
:
-
.buildkite/image_build/image_build_cpu_arm64.sh $REGISTRY $REPO $BUILDKITE_COMMIT
env
:
DOCKER_BUILDKIT
:
"
1"
retry
:
automatic
:
-
exit_status
:
-1
# Agent was lost
limit
:
2
-
exit_status
:
-10
# Agent was lost
limit
:
2
.buildkite/image_build/image_build_cpu.sh
0 → 100644
View file @
3b50924c
#!/bin/bash
set
-e
if
[[
$#
-lt
3
]]
;
then
echo
"Usage:
$0
<registry> <repo> <commit>"
exit
1
fi
REGISTRY
=
$1
REPO
=
$2
BUILDKITE_COMMIT
=
$3
# authenticate with AWS ECR
aws ecr-public get-login-password
--region
us-east-1 | docker login
--username
AWS
--password-stdin
"
$REGISTRY
"
# skip build if image already exists
if
[[
-z
$(
docker manifest inspect
"
$REGISTRY
"
/
"
$REPO
"
:
"
$BUILDKITE_COMMIT
"
-cpu
)
]]
;
then
echo
"Image not found, proceeding with build..."
else
echo
"Image found"
exit
0
fi
# build
docker build
--file
docker/Dockerfile.cpu
\
--build-arg
max_jobs
=
16
\
--build-arg
buildkite_commit
=
"
$BUILDKITE_COMMIT
"
\
--build-arg
VLLM_CPU_AVX512BF16
=
true
\
--build-arg
VLLM_CPU_AVX512VNNI
=
true
\
--build-arg
VLLM_CPU_AMXBF16
=
true
\
--tag
"
$REGISTRY
"
/
"
$REPO
"
:
"
$BUILDKITE_COMMIT
"
-cpu
\
--target
vllm-test
\
--progress
plain
.
# push
docker push
"
$REGISTRY
"
/
"
$REPO
"
:
"
$BUILDKITE_COMMIT
"
-cpu
.buildkite/image_build/image_build_cpu_arm64.sh
0 → 100644
View file @
3b50924c
#!/bin/bash
set
-e
if
[[
$#
-lt
3
]]
;
then
echo
"Usage:
$0
<registry> <repo> <commit>"
exit
1
fi
REGISTRY
=
$1
REPO
=
$2
BUILDKITE_COMMIT
=
$3
# authenticate with AWS ECR
aws ecr-public get-login-password
--region
us-east-1 | docker login
--username
AWS
--password-stdin
"
$REGISTRY
"
# skip build if image already exists
if
[[
-z
$(
docker manifest inspect
"
$REGISTRY
"
/
"
$REPO
"
:
"
$BUILDKITE_COMMIT
"
-arm64-cpu
)
]]
;
then
echo
"Image not found, proceeding with build..."
else
echo
"Image found"
exit
0
fi
# build
docker build
--file
docker/Dockerfile.cpu
\
--build-arg
max_jobs
=
16
\
--build-arg
buildkite_commit
=
"
$BUILDKITE_COMMIT
"
\
--tag
"
$REGISTRY
"
/
"
$REPO
"
:
"
$BUILDKITE_COMMIT
"
-arm64-cpu
\
--target
vllm-test
\
--progress
plain
.
# push
docker push
"
$REGISTRY
"
/
"
$REPO
"
:
"
$BUILDKITE_COMMIT
"
-arm64-cpu
.buildkite/image_build/image_build_hpu.sh
0 → 100644
View file @
3b50924c
#!/bin/bash
set
-e
if
[[
$#
-lt
3
]]
;
then
echo
"Usage:
$0
<registry> <repo> <commit>"
exit
1
fi
REGISTRY
=
$1
REPO
=
$2
BUILDKITE_COMMIT
=
$3
# authenticate with AWS ECR
aws ecr-public get-login-password
--region
us-east-1 | docker login
--username
AWS
--password-stdin
"
$REGISTRY
"
# skip build if image already exists
if
[[
-z
$(
docker manifest inspect
"
$REGISTRY
"
/
"
$REPO
"
:
"
$BUILDKITE_COMMIT
"
-hpu
)
]]
;
then
echo
"Image not found, proceeding with build..."
else
echo
"Image found"
exit
0
fi
# build
docker build
\
--file
tests/pytorch_ci_hud_benchmark/Dockerfile.hpu
\
--build-arg
max_jobs
=
16
\
--build-arg
buildkite_commit
=
"
$BUILDKITE_COMMIT
"
\
--tag
"
$REGISTRY
"
/
"
$REPO
"
:
"
$BUILDKITE_COMMIT
"
-hpu
\
--progress
plain
\
https://github.com/vllm-project/vllm-gaudi.git
# push
docker push
"
$REGISTRY
"
/
"
$REPO
"
:
"
$BUILDKITE_COMMIT
"
-hpu
.buildkite/lm-eval-harness/configs/DeepSeek-V2-Lite-Chat.yaml
0 → 100644
View file @
3b50924c
# For vllm script, with -t option (tensor parallel size).
# bash ./run-lm-eval-gsm-vllm-baseline.sh -m deepseek-ai/DeepSeek-V2-Lite-Chat -b "auto" -l 1000 -f 5 -t 2
model_name
:
"
deepseek-ai/DeepSeek-V2-Lite-Chat"
tasks
:
-
name
:
"
gsm8k"
metrics
:
-
name
:
"
exact_match,strict-match"
value
:
0.671
-
name
:
"
exact_match,flexible-extract"
value
:
0.664
limit
:
1000
num_fewshot
:
5
trust_remote_code
:
True
\ No newline at end of file
.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform.yaml
0 → 100644
View file @
3b50924c
# For hf script, without -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m nm-testing/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform -b auto -l 1000 -f 5
model_name
:
"
nm-testing/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform"
tasks
:
-
name
:
"
gsm8k"
metrics
:
-
name
:
"
exact_match,strict-match"
value
:
0.905
-
name
:
"
exact_match,flexible-extract"
value
:
0.905
limit
:
1000
num_fewshot
:
5
.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct.yaml
0 → 100644
View file @
3b50924c
# For hf script, without -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m meta-llama/Meta-Llama-3-70B-Instruct -b 32 -l 250 -f 5
model_name
:
"
meta-llama/Meta-Llama-3-70B-Instruct"
tasks
:
-
name
:
"
gsm8k"
metrics
:
-
name
:
"
exact_match,strict-match"
value
:
0.892
-
name
:
"
exact_match,flexible-extract"
value
:
0.892
limit
:
250
num_fewshot
:
5
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
0 → 100644
View file @
3b50924c
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-W8A8-FP8-Channelwise-compressed-tensors -b auto -l 1000 -f 5 -t 1
model_name
:
"
nm-testing/Meta-Llama-3-8B-Instruct-W8A8-FP8-Channelwise-compressed-tensors"
tasks
:
-
name
:
"
gsm8k"
metrics
:
-
name
:
"
exact_match,strict-match"
value
:
0.752
-
name
:
"
exact_match,flexible-extract"
value
:
0.754
limit
:
1000
num_fewshot
:
5
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform.yaml
0 → 100644
View file @
3b50924c
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform -b auto -l 1000 -f 5 -t 1
model_name
:
"
nm-testing/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform"
tasks
:
-
name
:
"
gsm8k"
metrics
:
-
name
:
"
exact_match,strict-match"
value
:
0.753
-
name
:
"
exact_match,flexible-extract"
value
:
0.753
limit
:
1000
num_fewshot
:
5
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
0 → 100644
View file @
3b50924c
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test -b 32 -l 1000 -f 5 -t 1
model_name
:
"
nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test"
tasks
:
-
name
:
"
gsm8k"
metrics
:
-
name
:
"
exact_match,strict-match"
value
:
0.755
-
name
:
"
exact_match,flexible-extract"
value
:
0.755
limit
:
1000
num_fewshot
:
5
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8.yaml
0 → 100644
View file @
3b50924c
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Meta-Llama-3-8B-Instruct-FP8 -b 32 -l 250 -f 5 -t 1
model_name
:
"
neuralmagic/Meta-Llama-3-8B-Instruct-FP8"
tasks
:
-
name
:
"
gsm8k"
metrics
:
-
name
:
"
exact_match,strict-match"
value
:
0.753
-
name
:
"
exact_match,flexible-extract"
value
:
0.753
limit
:
1000
num_fewshot
:
5
Prev
1
2
3
4
5
…
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment