Commit 3b50924c authored by raojy's avatar raojy
Browse files

raw_vllm

parent fbeb8a6f
Pipeline #3455 canceled with stages
#!/bin/bash
set -ex
# Get release version, default to 1.0.0.dev for nightly/per-commit builds
RELEASE_VERSION=$(buildkite-agent meta-data get release-version 2>/dev/null | sed 's/^v//')
if [ -z "${RELEASE_VERSION}" ]; then
RELEASE_VERSION="1.0.0.dev"
fi
buildkite-agent annotate --style 'info' --context 'release-workflow' << EOF
To download the wheel (by commit):
\`\`\`
aws s3 cp s3://vllm-wheels/${BUILDKITE_COMMIT}/vllm-${RELEASE_VERSION}-cp38-abi3-manylinux_2_31_x86_64.whl .
aws s3 cp s3://vllm-wheels/${BUILDKITE_COMMIT}/vllm-${RELEASE_VERSION}-cp38-abi3-manylinux_2_31_aarch64.whl .
(Optional) For CUDA 13.0:
aws s3 cp s3://vllm-wheels/${BUILDKITE_COMMIT}/vllm-${RELEASE_VERSION}+cu130-cp38-abi3-manylinux_2_35_x86_64.whl .
aws s3 cp s3://vllm-wheels/${BUILDKITE_COMMIT}/vllm-${RELEASE_VERSION}+cu130-cp38-abi3-manylinux_2_35_aarch64.whl .
(Optional) For CPU:
aws s3 cp s3://vllm-wheels/${BUILDKITE_COMMIT}/vllm-${RELEASE_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl .
aws s3 cp s3://vllm-wheels/${BUILDKITE_COMMIT}/vllm-${RELEASE_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl .
\`\`\`
To download and upload the image:
\`\`\`
# Download images:
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-x86_64
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-aarch64
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-x86_64-cu130
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-aarch64-cu130
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-rocm-base
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-rocm
docker pull public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v${RELEASE_VERSION}
docker pull public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION}
# Tag and push images:
## CUDA
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-x86_64 vllm/vllm-openai:x86_64
docker tag vllm/vllm-openai:x86_64 vllm/vllm-openai:latest-x86_64
docker tag vllm/vllm-openai:x86_64 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64
docker push vllm/vllm-openai:latest-x86_64
docker push vllm/vllm-openai:v${RELEASE_VERSION}-x86_64
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-x86_64-cu130 vllm/vllm-openai:x86_64-cu130
docker tag vllm/vllm-openai:x86_64-cu130 vllm/vllm-openai:latest-x86_64-cu130
docker tag vllm/vllm-openai:x86_64-cu130 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu130
docker push vllm/vllm-openai:latest-x86_64-cu130
docker push vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu130
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-aarch64 vllm/vllm-openai:aarch64
docker tag vllm/vllm-openai:aarch64 vllm/vllm-openai:latest-aarch64
docker tag vllm/vllm-openai:aarch64 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64
docker push vllm/vllm-openai:latest-aarch64
docker push vllm/vllm-openai:v${RELEASE_VERSION}-aarch64
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-aarch64-cu130 vllm/vllm-openai:aarch64-cu130
docker tag vllm/vllm-openai:aarch64-cu130 vllm/vllm-openai:latest-aarch64-cu130
docker tag vllm/vllm-openai:aarch64-cu130 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu130
docker push vllm/vllm-openai:latest-aarch64-cu130
docker push vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu130
## ROCm
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-rocm vllm/vllm-openai-rocm:${BUILDKITE_COMMIT}
docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT} vllm/vllm-openai-rocm:latest
docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT} vllm/vllm-openai-rocm:v${RELEASE_VERSION}
docker push vllm/vllm-openai-rocm:latest
docker push vllm/vllm-openai-rocm:v${RELEASE_VERSION}
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-rocm-base vllm/vllm-openai-rocm:${BUILDKITE_COMMIT}-base
docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT}-base vllm/vllm-openai-rocm:latest-base
docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT}-base vllm/vllm-openai-rocm:v${RELEASE_VERSION}-base
docker push vllm/vllm-openai-rocm:latest-base
docker push vllm/vllm-openai-rocm:v${RELEASE_VERSION}-base
## CPU
docker tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v${RELEASE_VERSION} vllm/vllm-openai-cpu:x86_64
docker tag vllm/vllm-openai-cpu:x86_64 vllm/vllm-openai-cpu:latest-x86_64
docker tag vllm/vllm-openai-cpu:x86_64 vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64
docker push vllm/vllm-openai-cpu:latest-x86_64
docker push vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64
docker tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION} vllm/vllm-openai-cpu:arm64
docker tag vllm/vllm-openai-cpu:arm64 vllm/vllm-openai-cpu:latest-arm64
docker tag vllm/vllm-openai-cpu:arm64 vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64
docker push vllm/vllm-openai-cpu:latest-arm64
docker push vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64
# Create multi-arch manifest:
docker manifest rm vllm/vllm-openai:latest
docker manifest create vllm/vllm-openai:latest vllm/vllm-openai:latest-x86_64 vllm/vllm-openai:latest-aarch64
docker manifest create vllm/vllm-openai:v${RELEASE_VERSION} vllm/vllm-openai:v${RELEASE_VERSION}-x86_64 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64
docker manifest push vllm/vllm-openai:latest
docker manifest push vllm/vllm-openai:v${RELEASE_VERSION}
docker manifest rm vllm/vllm-openai:latest-cu130
docker manifest create vllm/vllm-openai:latest-cu130 vllm/vllm-openai:latest-x86_64-cu130 vllm/vllm-openai:latest-aarch64-cu130
docker manifest create vllm/vllm-openai:v${RELEASE_VERSION}-cu130 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu130 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu130
docker manifest push vllm/vllm-openai:latest-cu130
docker manifest push vllm/vllm-openai:v${RELEASE_VERSION}-cu130
docker manifest rm vllm/vllm-openai-cpu:latest || true
docker manifest create vllm/vllm-openai-cpu:latest vllm/vllm-openai-cpu:latest-x86_64 vllm/vllm-openai-cpu:latest-arm64
docker manifest create vllm/vllm-openai-cpu:v${RELEASE_VERSION} vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64 vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64
docker manifest push vllm/vllm-openai-cpu:latest
docker manifest push vllm/vllm-openai-cpu:v${RELEASE_VERSION}
\`\`\`
EOF
#!/bin/bash
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
#
# Generate Buildkite annotation for ROCm wheel release
set -ex
# Get build configuration from meta-data
# Extract ROCm version dynamically from Dockerfile.rocm_base
# BASE_IMAGE format: rocm/dev-ubuntu-22.04:7.0-complete -> extracts "7.0"
ROCM_VERSION=$(grep -E '^ARG BASE_IMAGE=' docker/Dockerfile.rocm_base | sed -E 's/.*:([0-9]+\.[0-9]+).*/\1/' || echo "unknown")
PYTHON_VERSION=$(buildkite-agent meta-data get rocm-python-version 2>/dev/null || echo "3.12")
PYTORCH_ROCM_ARCH=$(buildkite-agent meta-data get rocm-pytorch-rocm-arch 2>/dev/null || echo "gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151")
# TODO: Enable the nightly build for ROCm
# Get release version, default to 1.0.0.dev for nightly/per-commit builds
RELEASE_VERSION=$(buildkite-agent meta-data get release-version 2>/dev/null || echo "")
if [ -z "${RELEASE_VERSION}" ]; then
RELEASE_VERSION="1.0.0.dev"
fi
# S3 URLs
S3_BUCKET="${S3_BUCKET:-vllm-wheels}"
S3_REGION="${AWS_DEFAULT_REGION:-us-west-2}"
S3_URL="http://${S3_BUCKET}.s3-website-${S3_REGION}.amazonaws.com"
# Format ROCm version for path (e.g., "7.1" -> "rocm710")
ROCM_VERSION_PATH="rocm$(echo "${ROCM_VERSION}" | tr -d '.')"
ROCM_PATH="rocm/${BUILDKITE_COMMIT}/${ROCM_VERSION_PATH}"
buildkite-agent annotate --style 'success' --context 'rocm-release-workflow' << EOF
## ROCm Wheel and Docker Image Releases
### Build Configuration
| Setting | Value |
|---------|-------|
| **ROCm Version** | ${ROCM_VERSION} |
| **Python Version** | ${PYTHON_VERSION} |
| **GPU Architectures** | ${PYTORCH_ROCM_ARCH} |
| **Branch** | \`${BUILDKITE_BRANCH}\` |
| **Commit** | \`${BUILDKITE_COMMIT}\` |
### :package: Installation
**Install from this build (by commit):**
\`\`\`bash
pip install vllm --extra-index-url ${S3_URL}/${ROCM_PATH}/ --trusted-host ${S3_BUCKET}.s3-website-${S3_REGION}.amazonaws.com
# Example for ROCm ${ROCM_VERSION}:
pip install vllm --extra-index-url ${S3_URL}/rocm/${BUILDKITE_COMMIT}/${ROCM_VERSION_PATH}/ --trusted-host ${S3_BUCKET}.s3-website-${S3_REGION}.amazonaws.com
\`\`\`
**Install from nightly (if published):**
\`\`\`bash
pip install vllm --extra-index-url ${S3_URL}/rocm/nightly/ --trusted-host ${S3_BUCKET}.s3-website-${S3_REGION}.amazonaws.com
\`\`\`
### :floppy_disk: Download Wheels Directly
\`\`\`bash
# List all ROCm wheels
aws s3 ls s3://${S3_BUCKET}/rocm/${BUILDKITE_COMMIT}/${ROCM_VERSION_PATH}/
# Download specific wheels
aws s3 cp s3://${S3_BUCKET}/rocm/${BUILDKITE_COMMIT}/${ROCM_VERSION_PATH}/vllm-*.whl .
aws s3 cp s3://${S3_BUCKET}/rocm/${BUILDKITE_COMMIT}/${ROCM_VERSION_PATH}/torch-*.whl .
aws s3 cp s3://${S3_BUCKET}/rocm/${BUILDKITE_COMMIT}/${ROCM_VERSION_PATH}/triton-*.whl .
aws s3 cp s3://${S3_BUCKET}/rocm/${BUILDKITE_COMMIT}/${ROCM_VERSION_PATH}/triton-kernels-*.whl .
aws s3 cp s3://${S3_BUCKET}/rocm/${BUILDKITE_COMMIT}/${ROCM_VERSION_PATH}/torchvision-*.whl .
aws s3 cp s3://${S3_BUCKET}/rocm/${BUILDKITE_COMMIT}/${ROCM_VERSION_PATH}/torchaudio-*.whl .
aws s3 cp s3://${S3_BUCKET}/rocm/${BUILDKITE_COMMIT}/${ROCM_VERSION_PATH}/amdsmi-*.whl .
aws s3 cp s3://${S3_BUCKET}/rocm/${BUILDKITE_COMMIT}/${ROCM_VERSION_PATH}/amd_aiter-*.whl .
aws s3 cp s3://${S3_BUCKET}/rocm/${BUILDKITE_COMMIT}/${ROCM_VERSION_PATH}/flash-attn-*.whl .
\`\`\`
### :gear: Included Packages
- **vllm**: vLLM with ROCm support
- **torch**: PyTorch built for ROCm ${ROCM_VERSION}
- **triton**: Triton
- **triton-kernels**: Triton kernels
- **torchvision**: TorchVision for ROCm PyTorch
- **torchaudio**: Torchaudio for ROCm PyTorch
- **amdsmi**: AMD SMI Python bindings
- **amd_aiter**: Aiter for ROCm
- **flash-attn**: Flash Attention for ROCm
### :warning: Notes
- These wheels are built for **ROCm ${ROCM_VERSION}** and will NOT work with CUDA GPUs
- Supported GPU architectures: ${PYTORCH_ROCM_ARCH}
- Platform: Linux x86_64 only
### :package: Docker Image Release
To download and upload the image:
\`\`\`
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-rocm-base
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-rocm
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-rocm-base vllm/vllm-openai-rocm:${BUILDKITE_COMMIT}-base
docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT}-base vllm/vllm-openai-rocm:latest-base
docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT}-base vllm/vllm-openai-rocm:v${RELEASE_VERSION}-base
docker push vllm/vllm-openai-rocm:latest-base
docker push vllm/vllm-openai-rocm:v${RELEASE_VERSION}-base
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-rocm vllm/vllm-openai-rocm:${BUILDKITE_COMMIT}
docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT} vllm/vllm-openai-rocm:latest
docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT} vllm/vllm-openai-rocm:v${RELEASE_VERSION}
docker push vllm/vllm-openai-rocm:latest
docker push vllm/vllm-openai-rocm:v${RELEASE_VERSION}
\`\`\`
EOF
#!/usr/bin/env bash
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
#
# Cache helper for ROCm base wheels
#
# This script manages caching of pre-built ROCm base wheels (torch, triton, etc.)
# to avoid rebuilding them when Dockerfile.rocm_base hasn't changed.
#
# Usage:
# cache-rocm-base-wheels.sh check - Check if cache exists, outputs "hit" or "miss"
# cache-rocm-base-wheels.sh upload - Upload wheels to cache
# cache-rocm-base-wheels.sh download - Download wheels from cache
# cache-rocm-base-wheels.sh key - Output the cache key
#
# Environment variables:
# S3_BUCKET - S3 bucket name (default: vllm-wheels)
# PYTHON_VERSION - Python version (affects cache key)
# PYTORCH_ROCM_ARCH - GPU architectures (affects cache key)
#
# Note: ROCm version is determined by BASE_IMAGE in Dockerfile.rocm_base,
# so changes to ROCm version are captured by the Dockerfile hash.
set -euo pipefail
BUCKET="${S3_BUCKET:-vllm-wheels}"
DOCKERFILE="docker/Dockerfile.rocm_base"
CACHE_PREFIX="rocm/cache"
# Generate hash from Dockerfile content + build args
generate_cache_key() {
# Include Dockerfile content
if [[ ! -f "$DOCKERFILE" ]]; then
echo "ERROR: Dockerfile not found: $DOCKERFILE" >&2
exit 1
fi
local dockerfile_hash=$(sha256sum "$DOCKERFILE" | cut -c1-16)
# Include key build args that affect the output
# These should match the ARGs in Dockerfile.rocm_base that change the build output
# Note: ROCm version is determined by BASE_IMAGE in the Dockerfile, so it's captured by dockerfile_hash
local args_string="${PYTHON_VERSION:-}|${PYTORCH_ROCM_ARCH:-}"
local args_hash=$(echo "$args_string" | sha256sum | cut -c1-8)
echo "${dockerfile_hash}-${args_hash}"
}
CACHE_KEY=$(generate_cache_key)
CACHE_PATH="s3://${BUCKET}/${CACHE_PREFIX}/${CACHE_KEY}/"
case "${1:-}" in
check)
echo "Checking cache for key: ${CACHE_KEY}" >&2
echo "Cache path: ${CACHE_PATH}" >&2
echo "Variables used in cache key:" >&2
echo " PYTHON_VERSION: ${PYTHON_VERSION:-<not set>}" >&2
echo " PYTORCH_ROCM_ARCH: ${PYTORCH_ROCM_ARCH:-<not set>}" >&2
# Check if cache exists by listing objects
# We look for at least one .whl file
echo "Running: aws s3 ls ${CACHE_PATH}" >&2
S3_OUTPUT=$(aws s3 ls "${CACHE_PATH}" 2>&1) || true
echo "S3 ls output:" >&2
echo "$S3_OUTPUT" | head -5 >&2
if echo "$S3_OUTPUT" | grep -q "\.whl"; then
echo "hit"
else
echo "miss"
fi
;;
upload)
echo "========================================"
echo "Uploading wheels to cache"
echo "========================================"
echo "Cache key: ${CACHE_KEY}"
echo "Cache path: ${CACHE_PATH}"
echo ""
if [[ ! -d "artifacts/rocm-base-wheels" ]]; then
echo "ERROR: artifacts/rocm-base-wheels directory not found" >&2
exit 1
fi
WHEEL_COUNT=$(find artifacts/rocm-base-wheels -maxdepth 1 -name '*.whl' 2>/dev/null | wc -l)
if [[ "$WHEEL_COUNT" -eq 0 ]]; then
echo "ERROR: No wheels found in artifacts/rocm-base-wheels/" >&2
exit 1
fi
echo "Uploading $WHEEL_COUNT wheels..."
aws s3 cp --recursive artifacts/rocm-base-wheels/ "${CACHE_PATH}"
echo ""
echo "Cache upload complete!"
echo "========================================"
;;
download)
echo "========================================"
echo "Downloading wheels from cache"
echo "========================================"
echo "Cache key: ${CACHE_KEY}"
echo "Cache path: ${CACHE_PATH}"
echo ""
mkdir -p artifacts/rocm-base-wheels
aws s3 cp --recursive "${CACHE_PATH}" artifacts/rocm-base-wheels/
echo ""
echo "Downloaded wheels:"
find artifacts/rocm-base-wheels -maxdepth 1 -name '*.whl' -exec ls -lh {} \;
WHEEL_COUNT=$(find artifacts/rocm-base-wheels -maxdepth 1 -name '*.whl' 2>/dev/null | wc -l)
echo ""
echo "Total: $WHEEL_COUNT wheels"
echo "========================================"
;;
key)
echo "${CACHE_KEY}"
;;
path)
echo "${CACHE_PATH}"
;;
*)
echo "Usage: $0 {check|upload|download|key|path}" >&2
echo "" >&2
echo "Commands:" >&2
echo " check - Check if cache exists, outputs 'hit' or 'miss'" >&2
echo " upload - Upload wheels from artifacts/rocm-base-wheels/ to cache" >&2
echo " download - Download wheels from cache to artifacts/rocm-base-wheels/" >&2
echo " key - Output the cache key" >&2
echo " path - Output the full S3 cache path" >&2
exit 1
;;
esac
#!/bin/bash
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
#
# Check if Ray LLM can generate lock files that are compatible with this
# version of vllm. Downloads Ray's requirement files and runs a full
# dependency resolution with the installed vllm's constraints to see if
# a valid lock file can be produced.
#
# See: https://github.com/vllm-project/vllm/issues/33599
set -eo pipefail
RAY_BASE_URL="https://raw.githubusercontent.com/ray-project/ray/master/python"
WORK_DIR=$(mktemp -d)
trap 'rm -rf "$WORK_DIR"' EXIT
# Fetch all Ray requirement files used in the LLM depset pipeline
echo ">>> Fetching Ray requirement files"
RAY_FILES=(
"requirements.txt"
"requirements/cloud-requirements.txt"
"requirements/base-test-requirements.txt"
"requirements/llm/llm-requirements.txt"
"requirements/llm/llm-test-requirements.txt"
)
for FILE in "${RAY_FILES[@]}"; do
LOCAL_PATH="${WORK_DIR}/$(basename "$FILE")"
echo " ${FILE}"
curl -fsSL -o "$LOCAL_PATH" "${RAY_BASE_URL}/${FILE}"
done
# Extract installed vllm deps
echo ">>> Extracting installed vllm dependency constraints"
python3 - "${WORK_DIR}/vllm-constraints.txt" <<'PYEOF'
"""Write out the installed vllm's dependencies as pip constraint lines.
Ray uses vllm[audio], so audio-extra deps are included with their extra
markers stripped. The resolver cannot evaluate extra markers for a
package that is not itself being resolved from an index, so we activate
them manually here.
"""
import importlib.metadata
import re
import sys
out_path = sys.argv[1]
raw_reqs = importlib.metadata.requires("vllm") or []
# Ray uses vllm[audio] – activate that extra.
ACTIVE_EXTRAS = {"audio"}
EXTRA_RE = re.compile(r"""extra\s*==\s*['"]([^'"]+)['"]""")
lines = []
for r in raw_reqs:
if ";" not in r:
# Unconditional dep — always include.
lines.append(r.strip())
continue
req_part, _, marker_part = r.partition(";")
marker_part = marker_part.strip()
extra_matches = EXTRA_RE.findall(marker_part)
if not extra_matches:
# Non-extra marker (python_version, etc.) — keep as-is.
lines.append(r.strip())
continue
if not ACTIVE_EXTRAS.intersection(extra_matches):
continue # Skip inactive extras (tensorizer, bench, …).
# Strip the extra== conditions but keep any remaining markers
# (e.g. python_version).
cleaned = EXTRA_RE.sub("", marker_part)
cleaned = re.sub(r"\band\b\s*\band\b", "and", cleaned)
cleaned = re.sub(r"^\s*and\s+|\s+and\s*$", "", cleaned).strip()
if cleaned:
lines.append(f"{req_part.strip()} ; {cleaned}")
else:
lines.append(req_part.strip())
with open(out_path, "w") as f:
for line in lines:
f.write(line + "\n")
print(f"Wrote {len(lines)} constraints to {out_path}")
PYEOF
echo ">>> Installed vllm deps (first 20 lines):"
head -20 "${WORK_DIR}/vllm-constraints.txt"
# Remove Ray's vllm pin — the installed vllm's transitive deps
# (written above) replace it in the resolution. vllm itself cannot
# be resolved from PyPI for in-development versions, so we test
# whether Ray's requirements can coexist with vllm's dependency
# constraints instead.
sed -i '/^vllm/d' "${WORK_DIR}/llm-requirements.txt"
# Install uv if needed
if ! command -v uv &>/dev/null; then
echo ">>> Installing uv"
pip install uv -q
fi
# Resolve: given vllm's constraints, can Ray compile a lock file?
#
# vllm's dependency constraints are the fixed side — Ray is flexible and
# can regenerate its lock files. We pass vllm's constraints via -c so
# the resolver treats them as non-negotiable bounds, then check whether
# Ray's own requirements can still be satisfied within those bounds.
echo ""
echo "============================================================"
echo ">>> Resolving: Can Ray generate compatible lock files?"
echo "============================================================"
set +e
uv pip compile \
"${WORK_DIR}/requirements.txt" \
"${WORK_DIR}/cloud-requirements.txt" \
"${WORK_DIR}/base-test-requirements.txt" \
"${WORK_DIR}/llm-requirements.txt" \
"${WORK_DIR}/llm-test-requirements.txt" \
-c "${WORK_DIR}/vllm-constraints.txt" \
--python-version 3.12 \
--python-platform x86_64-manylinux_2_31 \
--extra-index-url https://download.pytorch.org/whl/cu129 \
--index-strategy unsafe-best-match \
--unsafe-package setuptools \
--unsafe-package ray \
--no-header \
-o "${WORK_DIR}/resolved.txt" \
2>&1
EXIT_CODE=$?
set -e
echo ""
echo "=========================================="
if [ $EXIT_CODE -eq 0 ]; then
echo "SUCCESS: Ray can generate lock files compatible with this vllm."
echo ""
echo "Key resolved versions:"
grep -E '^(protobuf|torch|numpy|transformers)==' \
"${WORK_DIR}/resolved.txt" | sort || true
echo "=========================================="
exit 0
fi
echo "FAILURE: Ray cannot generate lock files compatible with this vllm."
echo "This means a fundamental dependency conflict exists that Ray"
echo "cannot resolve by regenerating its lock files."
echo "See: https://github.com/vllm-project/vllm/issues/33599"
echo "=========================================="
# Buildkite annotation
if [ -f /usr/bin/buildkite-agent ]; then
buildkite-agent annotate --style 'warning' --context 'ray-compat' << EOF
### :warning: Ray Dependency Compatibility Warning
This PR introduces dependencies that **cannot** be resolved with Ray's requirements.
Ray would not be able to regenerate its lock files to accommodate this vllm version.
Please check the **Ray Dependency Compatibility Check** step logs for details.
See [issue #33599](https://github.com/vllm-project/vllm/issues/33599) for context.
EOF
fi
# Notify Slack if webhook is configured.
if [ -n "$RAY_COMPAT_SLACK_WEBHOOK_URL" ]; then
echo ">>> Sending Slack notification"
# Single quotes are intentional: the f-string expressions are Python, not shell.
# shellcheck disable=SC2016
PAYLOAD=$(python3 -c '
import json, os, sys
pr = os.getenv("BUILDKITE_PULL_REQUEST", "N/A")
branch = os.getenv("BUILDKITE_BRANCH", "unknown")
url = os.getenv("BUILDKITE_BUILD_URL", "#")
data = {
"text": ":warning: Ray Dependency Compatibility Check Failed",
"blocks": [{
"type": "section",
"text": {
"type": "mrkdwn",
"text": (
"*:warning: Ray Dependency Compatibility Check Failed*\n"
f"PR #{pr} on branch `{branch}` introduces dependencies "
f"that cannot be resolved with Ray'\''s requirements.\n"
f"<{url}|View Build>"
),
},
}],
}
print(json.dumps(data))
')
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$RAY_COMPAT_SLACK_WEBHOOK_URL" \
-H 'Content-type: application/json' \
-d "$PAYLOAD")
echo " Slack webhook response: $HTTP_CODE"
else
echo ">>> Skipping Slack notification (RAY_COMPAT_SLACK_WEBHOOK_URL not set)"
fi
exit 1
#!/bin/bash
#
# cherry-pick-from-milestone.sh
# Find commits from a GitHub milestone that are missing from the current branch
# and output them in chronological order for cherry-picking.
#
# Usage: ./cherry-pick-from-milestone.sh <milestone> [--dry-run] [--execute]
#
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
usage() {
cat <<EOF
Usage: $(basename "$0") <milestone> [options]
Find commits from a GitHub milestone that need to be cherry-picked into the current branch.
Arguments:
milestone The GitHub milestone name (e.g., v0.14.0)
Options:
--dry-run Show the cherry-pick commands without executing (default)
--execute Actually execute the cherry-picks
--main-branch Specify the main branch name (default: main)
--help Show this help message
Examples:
$(basename "$0") v0.14.0
$(basename "$0") v0.14.0 --dry-run
$(basename "$0") v0.14.0 --execute
$(basename "$0") v0.14.0 --main-branch master
EOF
exit 1
}
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[OK]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1" >&2
}
# Default values
MILESTONE=""
DRY_RUN=true
MAIN_BRANCH="main"
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--dry-run)
DRY_RUN=true
shift
;;
--execute)
DRY_RUN=false
shift
;;
--main-branch)
MAIN_BRANCH="$2"
shift 2
;;
--help|-h)
usage
;;
-*)
log_error "Unknown option: $1"
usage
;;
*)
if [[ -z "$MILESTONE" ]]; then
MILESTONE="$1"
else
log_error "Unexpected argument: $1"
usage
fi
shift
;;
esac
done
# Validate milestone argument
if [[ -z "$MILESTONE" ]]; then
log_error "Milestone is required"
usage
fi
# Check if we're in a git repository
if ! git rev-parse --is-inside-work-tree &>/dev/null; then
log_error "Not in a git repository"
exit 1
fi
# Check if gh CLI is available
if ! command -v gh &>/dev/null; then
log_error "GitHub CLI (gh) is not installed"
exit 1
fi
# Check if authenticated with gh
if ! gh auth status &>/dev/null; then
log_error "Not authenticated with GitHub CLI. Run 'gh auth login' first."
exit 1
fi
CURRENT_BRANCH=$(git branch --show-current)
log_info "Current branch: ${CURRENT_BRANCH}"
log_info "Main branch: ${MAIN_BRANCH}"
log_info "Milestone: ${MILESTONE}"
echo ""
# Fetch latest from remote
log_info "Fetching latest from remote..."
git fetch origin "$MAIN_BRANCH" --quiet
# Get merged PRs from the milestone, sorted by merge date
log_info "Fetching merged PRs from milestone '${MILESTONE}'..."
# Store PR data in a temp file
PR_DATA=$(mktemp)
trap 'rm -f "$PR_DATA"' EXIT
if ! gh pr list --state merged --search "milestone:${MILESTONE}" \
--limit 1000 \
--json number,title,mergeCommit,mergedAt \
--jq 'sort_by(.mergedAt) | .[] | "\(.mergeCommit.oid)\t\(.number)\t\(.title)"' > "$PR_DATA" 2>/dev/null; then
log_error "Failed to fetch PRs from milestone '${MILESTONE}'"
log_error "This could be due to:"
log_error " - Milestone does not exist"
log_error " - Network/authentication issues"
log_error " - Invalid milestone name format"
exit 1
fi
if [[ ! -s "$PR_DATA" ]]; then
log_warn "No merged PRs found for milestone '${MILESTONE}'"
exit 0
fi
TOTAL_PRS=$(wc -l < "$PR_DATA")
log_info "Found ${TOTAL_PRS} merged PR(s) in milestone"
echo ""
# Find commits that are missing from current branch
MISSING_COMMITS=()
MISSING_INFO=()
while IFS=$'\t' read -r sha pr_number title; do
# Skip if SHA is empty or null
if [[ -z "$sha" || "$sha" == "null" ]]; then
log_warn "PR #${pr_number} has no merge commit SHA, skipping"
continue
fi
# Check if this commit is already in the current branch
if git merge-base --is-ancestor "$sha" HEAD 2>/dev/null; then
log_success "PR #${pr_number} already in branch: ${title:0:60}"
else
log_warn "PR #${pr_number} MISSING: ${title:0:60}"
MISSING_COMMITS+=("$sha")
MISSING_INFO+=("$sha PR #${pr_number}: ${title}")
fi
done < "$PR_DATA"
echo ""
if [[ ${#MISSING_COMMITS[@]} -eq 0 ]]; then
log_success "All PRs from milestone '${MILESTONE}' are already in the current branch!"
exit 0
fi
log_info "Found ${#MISSING_COMMITS[@]} missing commit(s) to cherry-pick"
echo ""
# Output the cherry-pick commands
echo "=========================================="
echo "Cherry-pick commands (in chronological order):"
echo "=========================================="
echo ""
for info in "${MISSING_INFO[@]}"; do
echo "# $info"
done
echo ""
echo "# Run these commands to cherry-pick all missing commits:"
echo "git cherry-pick ${MISSING_COMMITS[*]}"
echo ""
# Or one by one
echo "# Or cherry-pick one at a time:"
for sha in "${MISSING_COMMITS[@]}"; do
echo "git cherry-pick $sha"
done
echo ""
# Execute if requested
if [[ "$DRY_RUN" == false ]]; then
echo "=========================================="
log_info "Executing cherry-picks..."
echo "=========================================="
for i in "${!MISSING_COMMITS[@]}"; do
sha="${MISSING_COMMITS[$i]}"
info="${MISSING_INFO[$i]}"
echo ""
log_info "Cherry-picking: $info"
if git cherry-pick "$sha"; then
log_success "Successfully cherry-picked $sha"
else
log_error "Failed to cherry-pick $sha"
log_error "Resolve conflicts and run 'git cherry-pick --continue', or 'git cherry-pick --abort' to cancel"
exit 1
fi
done
echo ""
log_success "All cherry-picks completed successfully!"
else
echo "=========================================="
echo -e "${YELLOW}Dry run mode - no changes made${NC}"
echo "Run with --execute to perform the cherry-picks"
echo "=========================================="
fi
#!/bin/bash
# Usage: ./ci_clean_log.sh ci.log
# This script strips timestamps and color codes from CI log files.
# Check if argument is given
if [ $# -lt 1 ]; then
echo "Usage: $0 ci.log"
exit 1
fi
INPUT_FILE="$1"
# Strip timestamps
sed -i 's/^\[[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\}Z\] //' "$INPUT_FILE"
# Strip colorization
sed -i -r 's/\x1B\[[0-9;]*[mK]//g' "$INPUT_FILE"
#!/bin/bash
set -ex
# Clean up old nightly builds from DockerHub, keeping only the last 14 builds
# This script uses DockerHub API to list and delete old tags with specified prefix
# Usage: cleanup-nightly-builds.sh [TAG_PREFIX]
# Example: cleanup-nightly-builds.sh "nightly-" or cleanup-nightly-builds.sh "cu130-nightly-"
# Get tag prefix from argument, default to "nightly-" if not provided
TAG_PREFIX="${1:-nightly-}"
echo "Cleaning up tags with prefix: $TAG_PREFIX"
# DockerHub API endpoint for vllm/vllm-openai repository
REPO_API_URL="https://hub.docker.com/v2/repositories/vllm/vllm-openai/tags"
# Get DockerHub credentials from environment
if [ -z "$DOCKERHUB_TOKEN" ]; then
echo "Error: DOCKERHUB_TOKEN environment variable is not set"
exit 1
fi
if [ -z "$DOCKERHUB_USERNAME" ]; then
echo "Error: DOCKERHUB_USERNAME environment variable is not set"
exit 1
fi
# Get DockerHub bearer token
echo "Getting DockerHub bearer token..."
set +x
BEARER_TOKEN=$(curl -s -X POST \
-H "Content-Type: application/json" \
-d "{\"username\": \"$DOCKERHUB_USERNAME\", \"password\": \"$DOCKERHUB_TOKEN\"}" \
"https://hub.docker.com/v2/users/login" | jq -r '.token')
set -x
if [ -z "$BEARER_TOKEN" ] || [ "$BEARER_TOKEN" = "null" ]; then
echo "Error: Failed to get DockerHub bearer token"
exit 1
fi
# Function to get all tags from DockerHub
get_all_tags() {
local page=1
local all_tags=""
while true; do
set +x
local response=$(curl -s -H "Authorization: Bearer $BEARER_TOKEN" \
"$REPO_API_URL?page=$page&page_size=100")
set -x
# Get both last_updated timestamp and tag name, separated by |
local tags=$(echo "$response" | jq -r --arg prefix "$TAG_PREFIX" '.results[] | select(.name | startswith($prefix)) | "\(.last_updated)|\(.name)"')
if [ -z "$tags" ]; then
break
fi
all_tags="$all_tags$tags"$'\n'
page=$((page + 1))
done
# Sort by timestamp (newest first) and extract just the tag names
echo "$all_tags" | sort -r | cut -d'|' -f2
}
delete_tag() {
local tag_name="$1"
echo "Deleting tag: $tag_name"
local delete_url="https://hub.docker.com/v2/repositories/vllm/vllm-openai/tags/$tag_name"
set +x
local response=$(curl -s -X DELETE -H "Authorization: Bearer $BEARER_TOKEN" "$delete_url")
set -x
if echo "$response" | jq -e '.detail' > /dev/null 2>&1; then
echo "Warning: Failed to delete tag $tag_name: $(echo "$response" | jq -r '.detail')"
else
echo "Successfully deleted tag: $tag_name"
fi
}
# Get all nightly- prefixed tags, sorted by last_updated timestamp (newest first)
echo "Fetching all tags from DockerHub..."
all_tags=$(get_all_tags)
if [ -z "$all_tags" ]; then
echo "No tags found to clean up"
exit 0
fi
# Count total tags
total_tags=$(echo "$all_tags" | wc -l)
echo "Found $total_tags tags"
# Keep only the last 14 builds (including the current one)
tags_to_keep=14
tags_to_delete=$((total_tags - tags_to_keep))
if [ $tags_to_delete -le 0 ]; then
echo "No tags need to be deleted (only $total_tags tags found, keeping $tags_to_keep)"
exit 0
fi
echo "Will delete $tags_to_delete old tags, keeping the newest $tags_to_keep"
# Get tags to delete (skip the first $tags_to_keep tags)
tags_to_delete_list=$(echo "$all_tags" | tail -n +$((tags_to_keep + 1)))
if [ -z "$tags_to_delete_list" ]; then
echo "No tags to delete"
exit 0
fi
# Delete old tags
echo "Deleting old tags..."
while IFS= read -r tag; do
if [ -n "$tag" ]; then
delete_tag "$tag"
# Add a small delay to avoid rate limiting
sleep 1
fi
done <<< "$tags_to_delete_list"
echo "Cleanup completed successfully"
#!/usr/bin/env python3
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# do not complain about line length (for docstring)
# ruff: noqa: E501
import argparse
import json
import sys
from dataclasses import asdict, dataclass
from datetime import datetime
from pathlib import Path
from typing import Any
from urllib.parse import quote
import regex as re
def normalize_package_name(name: str) -> str:
"""
Normalize package name according to PEP 503.
https://peps.python.org/pep-0503/#normalized-names
Replace runs of underscores, hyphens, and periods with a single hyphen,
and lowercase the result.
"""
return re.sub(r"[-_.]+", "-", name).lower()
if not sys.version_info >= (3, 12):
raise RuntimeError("This script requires Python 3.12 or higher.")
INDEX_HTML_TEMPLATE = """<!DOCTYPE html>
<html>
<!-- {comment} -->
<meta name="pypi:repository-version" content="1.0">
<body>
{items}
</body>
</html>
"""
@dataclass
class WheelFileInfo:
package_name: str
version: str
build_tag: str | None
python_tag: str
abi_tag: str
platform_tag: str
variant: str | None
filename: str
def parse_from_filename(file: str) -> WheelFileInfo:
"""
Parse wheel file name to extract metadata.
The format of wheel names:
{package_name}-{version}(-{build_tag})?-{python_tag}-{abi_tag}-{platform_tag}.whl
All versions could contain a variant like '+cu129' or '.cpu' or `.rocm` (or not).
Example:
vllm-0.11.0-cp38-abi3-manylinux1_x86_64.whl
vllm-0.10.2rc2+cu129-cp38-abi3-manylinux2014_aarch64.whl
vllm-0.11.1rc8.dev14+gaa384b3c0-cp38-abi3-manylinux2014_aarch64.whl
vllm-0.11.1rc8.dev14+gaa384b3c0.cu130-cp38-abi3-manylinux1_x86_64.whl
"""
wheel_file_re = re.compile(
r"^(?P<package_name>.+)-(?P<version>[^-]+?)(-(?P<build_tag>[^-]+))?-(?P<python_tag>[^-]+)-(?P<abi_tag>[^-]+)-(?P<platform_tag>[^-]+)\.whl$"
)
match = wheel_file_re.match(file)
if not match:
raise ValueError(f"Invalid wheel file name: {file}")
package_name = match.group("package_name")
version = match.group("version")
build_tag = match.group("build_tag")
python_tag = match.group("python_tag")
abi_tag = match.group("abi_tag")
platform_tag = match.group("platform_tag")
# extract variant from version
variant = None
if "dev" in version:
ver_after_dev = version.split("dev")[-1]
if "." in ver_after_dev:
variant = ver_after_dev.split(".")[-1]
version = version.removesuffix("." + variant)
else:
if "+" in version:
version_part, suffix = version.split("+", 1)
# Only treat known patterns as variants (rocmXXX, cuXXX, cpu)
# Git hashes and other suffixes are NOT variants
if suffix.startswith(("rocm", "cu", "cpu")):
variant = suffix
version = version_part
# Otherwise keep the full version string (variant stays None)
return WheelFileInfo(
package_name=package_name,
version=version,
build_tag=build_tag,
python_tag=python_tag,
abi_tag=abi_tag,
platform_tag=platform_tag,
variant=variant,
filename=file,
)
def generate_project_list(subdir_names: list[str], comment: str = "") -> str:
"""
Generate project list HTML content linking to each project & variant subdirectory.
"""
href_tags = []
for name in sorted(subdir_names):
name = name.strip("/").strip(".")
href_tags.append(f' <a href="{name}/">{name}/</a><br/>')
return INDEX_HTML_TEMPLATE.format(items="\n".join(href_tags), comment=comment)
def generate_package_index_and_metadata(
wheel_files: list[WheelFileInfo],
wheel_base_dir: Path,
index_base_dir: Path,
comment: str = "",
) -> tuple[str, str]:
"""
Generate package index HTML content for a specific package, linking to actual wheel files.
"""
href_tags = []
metadata = []
for file in sorted(wheel_files, key=lambda x: x.filename):
relative_path = (
wheel_base_dir.relative_to(index_base_dir, walk_up=True) / file.filename
)
# handle with '+' in URL, and avoid double-encoding '/' and already-encoded '%2B'
# NOTE: this is AWS S3 specific behavior!
file_path_quoted = quote(relative_path.as_posix(), safe=":%/")
href_tags.append(f' <a href="{file_path_quoted}">{file.filename}</a><br/>')
file_meta = asdict(file)
file_meta["path"] = file_path_quoted
metadata.append(file_meta)
index_str = INDEX_HTML_TEMPLATE.format(items="\n".join(href_tags), comment=comment)
metadata_str = json.dumps(metadata, indent=2)
return index_str, metadata_str
def generate_index_and_metadata(
whl_files: list[str],
wheel_base_dir: Path,
index_base_dir: Path,
default_variant: str | None = None,
alias_to_default: str | None = None,
comment: str = "",
):
"""
Generate index for all wheel files.
Args:
whl_files (list[str]): List of wheel files (must be directly under `wheel_base_dir`).
wheel_base_dir (Path): Base directory for wheel files.
index_base_dir (Path): Base directory to store index files.
default_variant (str | None): The default variant name, if any.
alias_to_default (str | None): Alias variant name for the default variant, if any.
comment (str | None): Optional comment to include in the generated HTML files.
First, parse all wheel files to extract metadata.
We need to collect all wheel files for each variant, and generate an index for it (in a subdirectory).
The index for the default variant (if any) is generated in the root index directory.
If `default_variant` is provided, all wheels must have variant suffixes, and the default variant index
is purely a copy of the corresponding variant index, with only the links adjusted.
Otherwise, all wheels without variant suffixes are treated as the default variant.
If `alias_to_default` is provided, an additional alias subdirectory is created, it has the same content
as the default variant index, but the links are adjusted accordingly.
Index directory structure:
index_base_dir/ (hosted at wheels.vllm.ai/{nightly,$commit,$version}/)
index.html # project list, linking to "vllm/" and other packages, and all variant subdirectories
vllm/
index.html # package index, pointing to actual files in wheel_base_dir (relative path)
metadata.json # machine-readable metadata for all wheels in this package
cpu/ # cpu variant subdirectory
index.html
vllm/
index.html
metadata.json
cu129/ # cu129 is actually the alias to default variant
index.html
vllm/
index.html
metadata.json
cu130/ # cu130 variant subdirectory
index.html
vllm/
index.html
metadata.json
...
metadata.json stores a dump of all wheel files' metadata in a machine-readable format:
[
{
"package_name": "vllm",
"version": "0.10.2rc2",
"build_tag": null,
"python_tag": "cp38",
"abi_tag": "abi3",
"platform_tag": "manylinux2014_aarch64",
"variant": "cu129",
"filename": "vllm-0.10.2rc2+cu129-cp38-abi3-manylinux2014_aarch64.whl",
"path": "../vllm-0.10.2rc2%2Bcu129-cp38-abi3-manylinux2014_aarch64.whl" # to be concatenated with the directory URL and URL-encoded
},
...
]
"""
parsed_files = [parse_from_filename(f) for f in whl_files]
if not parsed_files:
print("No wheel files found, skipping index generation.")
return
# For ROCm builds: inherit variant from vllm wheel
# All ROCm wheels should share the same variant as vllm
rocm_variant = None
for file in parsed_files:
if (
file.package_name == "vllm"
and file.variant
and file.variant.startswith("rocm")
):
rocm_variant = file.variant
print(f"Detected ROCm variant from vllm: {rocm_variant}")
break
# Apply ROCm variant to all wheels without a variant
if rocm_variant:
for file in parsed_files:
if file.variant is None:
file.variant = rocm_variant
print(f"Inherited variant '{rocm_variant}' for {file.filename}")
# Group by variant
variant_to_files: dict[str, list[WheelFileInfo]] = {}
for file in parsed_files:
variant = file.variant or "default"
if variant not in variant_to_files:
variant_to_files[variant] = []
variant_to_files[variant].append(file)
print(f"Found variants: {list(variant_to_files.keys())}")
# sanity check for default variant
if default_variant:
if "default" in variant_to_files:
raise ValueError(
"All wheel files must have variant suffixes when `default_variant` is specified."
)
if default_variant not in variant_to_files:
raise ValueError(
f"Default variant '{default_variant}' not found among wheel files."
)
if alias_to_default:
if "default" not in variant_to_files:
# e.g. only some wheels are uploaded to S3 currently
print(
"[WARN] Alias to default variant specified, but no default variant found."
)
elif alias_to_default in variant_to_files:
raise ValueError(
f"Alias variant name '{alias_to_default}' already exists among wheel files."
)
else:
variant_to_files[alias_to_default] = variant_to_files["default"].copy()
print(f"Alias variant '{alias_to_default}' created for default variant.")
# Generate comment in HTML header
comment_str = f" ({comment})" if comment else ""
comment_tmpl = f"Generated on {datetime.now().isoformat()}{comment_str}"
# Generate index for each variant
subdir_names = set()
for variant, files in variant_to_files.items():
if variant == "default":
variant_dir = index_base_dir
else:
variant_dir = index_base_dir / variant
subdir_names.add(variant)
variant_dir.mkdir(parents=True, exist_ok=True)
# gather all package names in this variant (normalized per PEP 503)
packages = set(normalize_package_name(f.package_name) for f in files)
if variant == "default":
# these packages should also appear in the "project list"
# generate after all variants are processed
subdir_names = subdir_names.union(packages)
else:
# generate project list for this variant directly
project_list_str = generate_project_list(sorted(packages), comment_tmpl)
with open(variant_dir / "index.html", "w") as f:
f.write(project_list_str)
for package in packages:
# filter files belonging to this package only (compare normalized names)
package_files = [
f for f in files if normalize_package_name(f.package_name) == package
]
package_dir = variant_dir / package
package_dir.mkdir(parents=True, exist_ok=True)
index_str, metadata_str = generate_package_index_and_metadata(
package_files, wheel_base_dir, package_dir, comment
)
with open(package_dir / "index.html", "w") as f:
f.write(index_str)
with open(package_dir / "metadata.json", "w") as f:
f.write(metadata_str)
# Generate top-level project list index
project_list_str = generate_project_list(sorted(subdir_names), comment_tmpl)
with open(index_base_dir / "index.html", "w") as f:
f.write(project_list_str)
if __name__ == "__main__":
"""
Arguments:
--version <version> : version string for the current build (e.g., commit hash)
--wheel-dir <wheel_directory> : directory containing wheel files (default to be same as `version`)
--current-objects <path_to_json> : path to JSON file containing current S3 objects listing in this version directory
--output-dir <output_directory> : directory to store generated index files
--alias-to-default <alias_variant_name> : (optional) alias variant name for the default variant
--comment <comment_string> : (optional) comment string to include in generated HTML files
"""
parser = argparse.ArgumentParser(
description="Process nightly build wheel files to generate indices."
)
parser.add_argument(
"--version",
type=str,
required=True,
help="Version string for the current build (e.g., commit hash)",
)
parser.add_argument(
"--current-objects",
type=str,
required=True,
help="Path to JSON file containing current S3 objects listing in this version directory",
)
parser.add_argument(
"--output-dir",
type=str,
required=True,
help="Directory to store generated index files",
)
parser.add_argument(
"--wheel-dir",
type=str,
default=None,
help="Directory containing wheel files (default to be same as `version`)",
)
parser.add_argument(
"--alias-to-default",
type=str,
default=None,
help="Alias variant name for the default variant",
)
parser.add_argument(
"--comment",
type=str,
default="",
help="Optional comment string to include in generated HTML files",
)
args = parser.parse_args()
version = args.version
# Allow rocm/ prefix, reject other slashes and all backslashes
if "\\" in version:
raise ValueError("Version string must not contain backslashes.")
if "/" in version and not version.startswith("rocm/"):
raise ValueError(
"Version string must not contain slashes (except for 'rocm/' prefix)."
)
current_objects_path = Path(args.current_objects)
output_dir = Path(args.output_dir)
if not output_dir.exists():
output_dir.mkdir(parents=True, exist_ok=True)
# Read current objects JSON
with open(current_objects_path) as f:
current_objects: dict[str, list[dict[str, Any]]] = json.load(f)
# current_objects looks like from list_objects_v2 S3 API:
"""
"Contents": [
{
"Key": "e2f56c309d2a28899c68975a7e104502d56deb8f/vllm-0.11.2.dev363+ge2f56c309-cp38-abi3-manylinux1_x86_64.whl",
"LastModified": "2025-11-28T14:00:32+00:00",
"ETag": "\"37a38339c7cdb61ca737021b968075df-52\"",
"ChecksumAlgorithm": [
"CRC64NVME"
],
"ChecksumType": "FULL_OBJECT",
"Size": 435649349,
"StorageClass": "STANDARD"
},
...
]
"""
# Extract wheel file keys
wheel_files = []
for item in current_objects.get("Contents", []):
key: str = item["Key"]
if key.endswith(".whl"):
wheel_files.append(key.split("/")[-1]) # only the filename is used
print(f"Found {len(wheel_files)} wheel files for version {version}: {wheel_files}")
# keep only "official" files for a non-nightly version (specified by cli args)
PY_VERSION_RE = re.compile(r"^\d+\.\d+\.\d+([a-zA-Z0-9.+-]*)?$")
if PY_VERSION_RE.match(version):
# upload-wheels.sh ensures no "dev" is in args.version
wheel_files = list(
filter(lambda x: version in x and "dev" not in x, wheel_files)
)
print(f"Non-nightly version detected, wheel files used: {wheel_files}")
else:
print("Nightly version detected, keeping all wheel files.")
# Generate index and metadata, assuming wheels and indices are stored as:
# s3://vllm-wheels/{wheel_dir}/<wheel files>
# s3://vllm-wheels/<anything>/<index files>
#
# For ROCm builds, version is "rocm/{commit}" and indices are uploaded to:
# - rocm/{commit}/ (same as wheels)
# - rocm/nightly/
# - rocm/{version}/
# All these are under the "rocm/" prefix, so relative paths should be
# relative to "rocm/", not the bucket root.
if args.wheel_dir:
# Explicit wheel-dir provided (e.g., for version-specific indices pointing to commit dir)
wheel_dir = args.wheel_dir.strip().rstrip("/")
elif version.startswith("rocm/"):
# For rocm/commit, wheel_base_dir should be just the commit part
# so relative path from rocm/0.12.0/rocm710/vllm/ -> ../../../{commit}/
wheel_dir = version.split("/", 1)[1]
else:
wheel_dir = version
wheel_base_dir = Path(output_dir).parent / wheel_dir
index_base_dir = Path(output_dir)
generate_index_and_metadata(
whl_files=wheel_files,
wheel_base_dir=wheel_base_dir,
index_base_dir=index_base_dir,
default_variant=None,
alias_to_default=args.alias_to_default,
comment=args.comment.strip(),
)
print(f"Successfully generated index and metadata in {output_dir}")
#!/bin/bash
# This script runs tests inside the corresponding ROCm docker container.
# It handles both single-node and multi-node test configurations.
#
# Multi-node detection: Instead of matching on fragile group names, we detect
# multi-node jobs structurally by looking for the bracket command syntax
# "[node0_cmds] && [node1_cmds]" or via the NUM_NODES environment variable.
#
###############################################################################
# QUOTING / COMMAND PASSING
#
# Passing commands as positional arguments ($*) is fragile when the command
# string itself contains double quotes, e.g.:
#
# bash run-amd-test.sh "export FLAGS="value" && pytest -m "not slow""
#
# The outer shell resolves the nested quotes *before* this script runs, so
# the script receives mangled input it cannot fully recover.
#
# Preferred: pass commands via the VLLM_TEST_COMMANDS environment variable:
#
# export VLLM_TEST_COMMANDS='export FLAGS="value" && pytest -m "not slow"'
# bash run-amd-test.sh
#
# Single-quoted assignment preserves all inner double quotes verbatim.
# The $* path is kept for backward compatibility but callers should migrate.
###############################################################################
set -o pipefail
# Export Python path
export PYTHONPATH=".."
###############################################################################
# Helper Functions
###############################################################################
wait_for_clean_gpus() {
local timeout=${1:-300}
local start=$SECONDS
echo "--- Waiting for clean GPU state (timeout: ${timeout}s)"
while true; do
if grep -q clean /opt/amdgpu/etc/gpu_state; then
echo "GPUs state is \"clean\""
return
fi
if (( SECONDS - start >= timeout )); then
echo "Error: GPUs did not reach clean state within ${timeout}s" >&2
exit 1
fi
sleep 3
done
}
cleanup_docker() {
# Get Docker's root directory
docker_root=$(docker info -f '{{.DockerRootDir}}')
if [ -z "$docker_root" ]; then
echo "Failed to determine Docker root directory."
exit 1
fi
echo "Docker root directory: $docker_root"
disk_usage=$(df "$docker_root" | tail -1 | awk '{print $5}' | sed 's/%//')
threshold=70
if [ "$disk_usage" -gt "$threshold" ]; then
echo "Disk usage is above $threshold%. Cleaning up Docker images and volumes..."
docker image prune -f
docker volume prune -f && docker system prune --force --filter "until=72h" --all
echo "Docker images and volumes cleanup completed."
else
echo "Disk usage is below $threshold%. No cleanup needed."
fi
}
cleanup_network() {
local max_nodes=${NUM_NODES:-2}
for node in $(seq 0 $((max_nodes - 1))); do
if docker ps -a -q -f name="node${node}" | grep -q .; then
docker stop "node${node}" || true
fi
done
if docker network ls | grep -q docker-net; then
docker network rm docker-net || true
fi
}
is_multi_node() {
local cmds="$1"
# Primary signal: NUM_NODES environment variable set by the pipeline
if [[ "${NUM_NODES:-1}" -gt 1 ]]; then
return 0
fi
# Fallback: detect the bracket syntax structurally
# Pattern: [...] && [...] (per-node command arrays)
if [[ "$cmds" =~ \[.*\].*\&\&.*\[.*\] ]]; then
return 0
fi
return 1
}
handle_pytest_exit() {
local exit_code=$1
if [ "$exit_code" -eq 5 ]; then
echo "Pytest exit code 5 (no tests collected) - treating as success."
exit 0
fi
exit "$exit_code"
}
###############################################################################
# Pytest marker/keyword re-quoting
#
# When commands are passed through Buildkite -> shell -> $* -> bash -c,
# quotes around multi-word pytest -m/-k expressions get stripped:
# pytest -v -s -m 'not cpu_test' v1/core
# becomes:
# pytest -v -s -m not cpu_test v1/core
#
# pytest then interprets "cpu_test" as a file path, not part of the marker.
#
# This function detects unquoted expressions after -m/-k and re-quotes them
# by collecting tokens until a recognizable boundary is reached:
# - test path (contains '/')
# - test file (ends with '.py')
# - another pytest flag (--xxx or -x single-char flags)
# - command separator (&& || ; |)
# - environment variable assignment (FOO=bar)
#
# Single-word markers (e.g. -m cpu_test, -m hybrid_model) pass through
# unquoted since they have no spaces and work fine.
#
# Already-quoted expressions (containing literal single quotes) are passed
# through untouched to avoid double-quoting values injected by
# apply_rocm_test_overrides.
#
# NOTE: This ONLY fixes -m/-k flags. It cannot recover arbitrary inner
# double-quotes stripped by the calling shell (see header comment).
# Use VLLM_TEST_COMMANDS to avoid the problem entirely.
###############################################################################
re_quote_pytest_markers() {
local input="$1"
local output=""
local collecting=false
local marker_buf=""
# Strip backslash-newline continuations, then flatten remaining newlines
local flat="${input//$'\\\n'/ }"
flat="${flat//$'\n'/ }"
# Disable globbing to prevent *.py etc. from expanding during read -ra
local restore_glob
restore_glob="$(shopt -p -o noglob 2>/dev/null || true)"
set -o noglob
local -a words
read -ra words <<< "$flat"
eval "$restore_glob"
for word in "${words[@]}"; do
if $collecting; then
# If the token we're about to collect already contains a literal
# single quote, the expression was already quoted upstream.
# Flush and stop collecting.
if [[ "$word" == *"'"* ]]; then
if [[ -n "$marker_buf" ]]; then
# Should not normally happen (partial buf + quote), flush raw
output+="${marker_buf} "
marker_buf=""
fi
output+="${word} "
collecting=false
continue
fi
local is_boundary=false
case "$word" in
# Line-continuation artifact
"\\")
is_boundary=true ;;
# Command separators
"&&"|"||"|";"|"|")
is_boundary=true ;;
# Long flags (--ignore, --shard-id, etc.)
--*)
is_boundary=true ;;
# Short flags (-v, -s, -x, etc.) but NOT negative marker tokens
# like "not" which don't start with "-". Also skip -k/-m which
# would start a new marker (handled below).
-[a-zA-Z])
is_boundary=true ;;
# Test path (contains /)
*/*)
is_boundary=true ;;
# Test file (ends with .py, possibly with ::method)
*.py|*.py::*)
is_boundary=true ;;
# Environment variable assignment preceding a command (FOO=bar)
*=*)
# Only treat as boundary if it looks like VAR=value, not
# pytest filter expressions like num_gpus=2 inside markers
if [[ "$word" =~ ^[A-Z_][A-Z0-9_]*= ]]; then
is_boundary=true
fi
;;
esac
if $is_boundary; then
# Flush the collected marker expression
if [[ "$marker_buf" == *" "* || "$marker_buf" == *"("* ]]; then
output+="'${marker_buf}' "
else
output+="${marker_buf} "
fi
collecting=false
marker_buf=""
# Check if this boundary word itself starts a new -m/-k
if [[ "$word" == "-m" || "$word" == "-k" ]]; then
output+="${word} "
collecting=true
# Drop stray backslash tokens silently
elif [[ "$word" == "\\" ]]; then
:
else
output+="${word} "
fi
else
# Accumulate into marker buffer
if [[ -n "$marker_buf" ]]; then
marker_buf+=" ${word}"
else
marker_buf="${word}"
fi
fi
elif [[ "$word" == "-m" || "$word" == "-k" ]]; then
output+="${word} "
collecting=true
marker_buf=""
else
output+="${word} "
fi
done
# Flush any trailing marker expression (marker at end of command)
if $collecting && [[ -n "$marker_buf" ]]; then
if [[ "$marker_buf" == *" "* || "$marker_buf" == *"("* ]]; then
output+="'${marker_buf}'"
else
output+="${marker_buf}"
fi
fi
echo "${output% }"
}
###############################################################################
# ROCm-specific pytest command rewrites
#
# These apply ignore flags and environment overrides for tests that are not
# yet supported or behave differently on ROCm hardware. Kept as a single
# function so new exclusions are easy to add in one place.
###############################################################################
apply_rocm_test_overrides() {
local cmds="$1"
# --- Model registry filter ---
if [[ $cmds == *"pytest -v -s models/test_registry.py"* ]]; then
cmds=${cmds//"pytest -v -s models/test_registry.py"/"pytest -v -s models/test_registry.py -k 'not BambaForCausalLM and not GritLM and not Mamba2ForCausalLM and not Zamba2ForCausalLM'"}
fi
# --- LoRA: disable custom paged attention ---
if [[ $cmds == *"pytest -v -s lora"* ]]; then
cmds=${cmds//"pytest -v -s lora"/"VLLM_ROCM_CUSTOM_PAGED_ATTN=0 pytest -v -s lora"}
fi
# --- Kernel ignores ---
if [[ $cmds == *" kernels/core"* ]]; then
cmds="${cmds} \
--ignore=kernels/core/test_fused_quant_layernorm.py \
--ignore=kernels/core/test_permute_cols.py"
fi
if [[ $cmds == *" kernels/attention"* ]]; then
cmds="${cmds} \
--ignore=kernels/attention/test_attention_selector.py \
--ignore=kernels/attention/test_encoder_decoder_attn.py \
--ignore=kernels/attention/test_flash_attn.py \
--ignore=kernels/attention/test_flashinfer.py \
--ignore=kernels/attention/test_prefix_prefill.py \
--ignore=kernels/attention/test_cascade_flash_attn.py \
--ignore=kernels/attention/test_mha_attn.py \
--ignore=kernels/attention/test_lightning_attn.py \
--ignore=kernels/attention/test_attention.py"
fi
if [[ $cmds == *" kernels/quantization"* ]]; then
cmds="${cmds} \
--ignore=kernels/quantization/test_int8_quant.py \
--ignore=kernels/quantization/test_machete_mm.py \
--ignore=kernels/quantization/test_block_fp8.py \
--ignore=kernels/quantization/test_block_int8.py \
--ignore=kernels/quantization/test_marlin_gemm.py \
--ignore=kernels/quantization/test_cutlass_scaled_mm.py \
--ignore=kernels/quantization/test_int8_kernel.py"
fi
if [[ $cmds == *" kernels/mamba"* ]]; then
cmds="${cmds} \
--ignore=kernels/mamba/test_mamba_mixer2.py \
--ignore=kernels/mamba/test_causal_conv1d.py \
--ignore=kernels/mamba/test_mamba_ssm_ssd.py"
fi
if [[ $cmds == *" kernels/moe"* ]]; then
cmds="${cmds} \
--ignore=kernels/moe/test_moe.py \
--ignore=kernels/moe/test_cutlass_moe.py \
--ignore=kernels/moe/test_triton_moe_ptpc_fp8.py"
fi
# --- Entrypoint ignores ---
if [[ $cmds == *" entrypoints/openai "* ]]; then
cmds=${cmds//" entrypoints/openai "/" entrypoints/openai \
--ignore=entrypoints/openai/test_audio.py \
--ignore=entrypoints/openai/test_shutdown.py \
--ignore=entrypoints/openai/test_completion.py \
--ignore=entrypoints/openai/test_models.py \
--ignore=entrypoints/openai/test_lora_adapters.py \
--ignore=entrypoints/openai/test_return_tokens_as_ids.py \
--ignore=entrypoints/openai/test_root_path.py \
--ignore=entrypoints/openai/test_tokenization.py \
--ignore=entrypoints/openai/test_prompt_validation.py "}
fi
if [[ $cmds == *" entrypoints/llm "* ]]; then
cmds=${cmds//" entrypoints/llm "/" entrypoints/llm \
--ignore=entrypoints/llm/test_chat.py \
--ignore=entrypoints/llm/test_accuracy.py \
--ignore=entrypoints/llm/test_init.py \
--ignore=entrypoints/llm/test_prompt_validation.py "}
fi
# Clean up escaped newlines from --ignore appends
cmds=$(echo "$cmds" | sed 's/ \\ / /g')
echo "$cmds"
}
###############################################################################
# Main
###############################################################################
# --- GPU initialization ---
echo "--- Confirming Clean Initial State"
wait_for_clean_gpus
echo "--- ROCm info"
rocminfo
# --- Docker housekeeping ---
cleanup_docker
echo "--- Resetting GPUs"
echo "reset" > /opt/amdgpu/etc/gpu_state
wait_for_clean_gpus
# --- Pull test image ---
echo "--- Pulling container"
image_name="rocm/vllm-ci:${BUILDKITE_COMMIT}"
container_name="rocm_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"
docker pull "${image_name}"
remove_docker_container() {
docker rm -f "${container_name}" || docker image rm -f "${image_name}" || true
}
trap remove_docker_container EXIT
# --- Prepare commands ---
echo "--- Running container"
HF_CACHE="$(realpath ~)/huggingface"
mkdir -p "${HF_CACHE}"
HF_MOUNT="/root/.cache/huggingface"
# ---- Command source selection ----
# Prefer VLLM_TEST_COMMANDS (preserves all inner quoting intact).
# Fall back to $* for backward compatibility, but warn that inner
# double-quotes will have been stripped by the calling shell.
if [[ -n "${VLLM_TEST_COMMANDS:-}" ]]; then
commands="${VLLM_TEST_COMMANDS}"
echo "Commands sourced from VLLM_TEST_COMMANDS (quoting preserved)"
else
commands="$*"
if [[ -z "$commands" ]]; then
echo "Error: No test commands provided." >&2
echo "Usage:" >&2
echo " Preferred: VLLM_TEST_COMMANDS='...' bash $0" >&2
echo " Legacy: bash $0 \"commands here\"" >&2
exit 1
fi
echo "Commands sourced from positional args (legacy mode)"
echo "WARNING: Inner double-quotes in the command string may have been"
echo " stripped by the calling shell. If you see syntax errors, switch to:"
echo " export VLLM_TEST_COMMANDS='your commands here'"
echo " bash $0"
fi
echo "Raw commands: $commands"
# Fix quoting before ROCm overrides (so overrides see correct structure)
commands=$(re_quote_pytest_markers "$commands")
echo "After re-quoting: $commands"
commands=$(apply_rocm_test_overrides "$commands")
echo "Final commands: $commands"
MYPYTHONPATH=".."
# Verify GPU access
render_gid=$(getent group render | cut -d: -f3)
if [[ -z "$render_gid" ]]; then
echo "Error: 'render' group not found. This is required for GPU access." >&2
exit 1
fi
# --- RDMA device passthrough (conditional) ---
# If the host has RDMA devices, pass them through so tests like
# test_moriio_connector can access ibverbs. On hosts without RDMA
# hardware the tests will gracefully skip via _rdma_available().
RDMA_FLAGS=""
if [ -d /dev/infiniband ]; then
echo "RDMA devices detected on host, enabling passthrough"
RDMA_FLAGS="--device /dev/infiniband --cap-add=IPC_LOCK"
else
echo "No RDMA devices found on host, RDMA tests will be skipped"
fi
# --- Route: multi-node vs single-node ---
if is_multi_node "$commands"; then
echo "--- Multi-node job detected"
export DCKR_VER=$(docker --version | sed 's/Docker version \(.*\), build .*/\1/')
# Parse the bracket syntax: prefix ; [node0_cmds] && [node1_cmds]
# BASH_REMATCH[1] = prefix (everything before first bracket)
# BASH_REMATCH[2] = comma-separated node0 commands
# BASH_REMATCH[3] = comma-separated node1 commands
if [[ "$commands" =~ ^(.*)\[(.*)"] && ["(.*)\]$ ]]; then
prefix=$(echo "${BASH_REMATCH[1]}" | sed 's/;//g')
echo "PREFIX: ${prefix}"
export composite_command="(command rocm-smi || true)"
saved_IFS=$IFS
IFS=','
read -ra node0 <<< "${BASH_REMATCH[2]}"
read -ra node1 <<< "${BASH_REMATCH[3]}"
IFS=$saved_IFS
if [[ ${#node0[@]} -ne ${#node1[@]} ]]; then
echo "Warning: node0 has ${#node0[@]} commands, node1 has ${#node1[@]}. They will be paired by index."
fi
for i in "${!node0[@]}"; do
command_node_0=$(echo "${node0[i]}" | sed 's/\"//g')
command_node_1=$(echo "${node1[i]}" | sed 's/\"//g')
step_cmd="./.buildkite/scripts/run-multi-node-test.sh /vllm-workspace/tests 2 2 ${image_name} '${command_node_0}' '${command_node_1}'"
echo "COMMANDS: ${step_cmd}"
composite_command="${composite_command} && ${step_cmd}"
done
/bin/bash -c "${composite_command}"
exit_code=$?
cleanup_network
handle_pytest_exit "$exit_code"
else
echo "Multi-node job detected but failed to parse bracket command syntax."
echo "Expected format: prefix ; [node0_cmd1, node0_cmd2] && [node1_cmd1, node1_cmd2]"
echo "Got: $commands"
cleanup_network
exit 111
fi
else
echo "--- Single-node job"
echo "Render devices: $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES"
docker run \
--device /dev/kfd $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES \
$RDMA_FLAGS \
--network=host \
--shm-size=16gb \
--group-add "$render_gid" \
--rm \
-e HF_TOKEN \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
-v "${HF_CACHE}:${HF_MOUNT}" \
-e "HF_HOME=${HF_MOUNT}" \
-e "PYTHONPATH=${MYPYTHONPATH}" \
--name "${container_name}" \
"${image_name}" \
/bin/bash -c "${commands}"
exit_code=$?
handle_pytest_exit "$exit_code"
fi
#!/bin/bash
set -euox pipefail
export VLLM_CPU_CI_ENV=0
echo "--- PP+TP"
vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -pp=2 &
server_pid=$!
timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1
vllm bench serve \
--backend vllm \
--dataset-name random \
--model meta-llama/Llama-3.2-3B-Instruct \
--num-prompts 20 \
--result-dir ./test_results \
--result-filename tp_pp.json \
--save-result \
--endpoint /v1/completions
kill -s SIGTERM $server_pid; wait $server_pid || true
failed_req=$(jq '.failed' ./test_results/tp_pp.json)
if [ "$failed_req" -ne 0 ]; then
echo "Some requests were failed!"
exit 1
fi
echo "--- DP+TP"
vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -dp=2 &
server_pid=$!
timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1
vllm bench serve \
--backend vllm \
--dataset-name random \
--model meta-llama/Llama-3.2-3B-Instruct \
--num-prompts 20 \
--result-dir ./test_results \
--result-filename dp_pp.json \
--save-result \
--endpoint /v1/completions
kill -s SIGTERM $server_pid; wait $server_pid || true
failed_req=$(jq '.failed' ./test_results/dp_pp.json)
if [ "$failed_req" -ne 0 ]; then
echo "Some requests were failed!"
exit 1
fi
#!/bin/bash
# This script build the CPU docker image and run the offline inference inside the container.
# It serves a sanity check for compilation and basic model usage.
set -ex
# allow to bind to different cores
CORE_RANGE=${CORE_RANGE:-0-16}
OMP_CORE_RANGE=${OMP_CORE_RANGE:-0-16}
export CMAKE_BUILD_PARALLEL_LEVEL=16
# Setup cleanup
remove_docker_container() {
set -e;
docker rm -f cpu-test || true;
}
trap remove_docker_container EXIT
remove_docker_container
# Try building the docker image
docker build --tag cpu-test --target vllm-test -f docker/Dockerfile.cpu .
# Run the image
docker run -itd --cpuset-cpus="$CORE_RANGE" --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=16 --env VLLM_CPU_CI_ENV=1 -e E2E_OMP_THREADS="$OMP_CORE_RANGE" --shm-size=4g --name cpu-test cpu-test
function cpu_tests() {
set -e
docker exec cpu-test bash -c "
set -e
pip list"
# offline inference
docker exec cpu-test bash -c "
set -e
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m"
# Run model tests
docker exec cpu-test bash -c "
set -e
pytest -x -v -s tests/models/multimodal/generation/test_whisper.py -m cpu_model"
# Run kernel tests
docker exec cpu-test bash -c "
set -e
pytest -x -v -s tests/kernels/test_onednn.py
pytest -x -v -s tests/kernels/attention/test_cpu_attn.py
pytest -x -v -s tests/kernels/moe/test_moe.py -k test_cpu_fused_moe_basic"
# basic online serving
docker exec cpu-test bash -c '
set -e
VLLM_CPU_OMP_THREADS_BIND=$E2E_OMP_THREADS vllm serve Qwen/Qwen3-0.6B --max-model-len 2048 &
server_pid=$!
timeout 600 bash -c "until curl localhost:8000/v1/models; do sleep 1; done" || exit 1
vllm bench serve \
--backend vllm \
--dataset-name random \
--model Qwen/Qwen3-0.6B \
--num-prompts 20 \
--endpoint /v1/completions
kill -s SIGTERM $server_pid &'
}
# All of CPU tests are expected to be finished less than 40 mins.
export -f cpu_tests
timeout 2h bash -c cpu_tests
#!/bin/bash
# This script build the CPU docker image and run the offline inference inside the container.
# It serves a sanity check for compilation and basic model usage.
set -ex
# Setup cleanup
remove_docker_container() {
if [[ -n "$container_id" ]]; then
podman stop --all -t0
podman rm -f "$container_id" || true
fi
podman system prune -f
}
trap remove_docker_container EXIT
remove_docker_container
# Try building the docker image
podman build -t cpu-test-ubi9-ppc -f docker/Dockerfile.ppc64le .
# Run the image
container_id=$(podman run -itd --entrypoint /bin/bash -v /tmp/:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN cpu-test-ubi9-ppc)
function cpu_tests() {
# offline inference
podman exec -it "$container_id" bash -c "
export TORCH_COMPILE_DISABLE=1
set -xve
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m" >> "$HOME"/test_basic.log
# Run basic model test
podman exec -it "$container_id" bash -c "
export TORCH_COMPILE_DISABLE=1
set -evx
pip install pytest pytest-asyncio einops peft Pillow soundfile transformers_stream_generator matplotlib
pip install sentence-transformers datamodel_code_generator tblib
# Note: disable Bart until supports V1
# pytest -v -s tests/models/language/generation/test_bart.py -m cpu_model
pytest -v -s tests/models/language/generation/test_common.py::test_models[False-False-5-32-openai-community/gpt2]
pytest -v -s tests/models/language/generation/test_common.py::test_models[False-False-5-32-facebook/opt-125m]
pytest -v -s tests/models/language/generation/test_common.py::test_models[False-False-5-32-google/gemma-1.1-2b-it]
pytest -v -s tests/models/language/pooling/test_classification.py::test_models[float-jason9693/Qwen2.5-1.5B-apeach]
# TODO: Below test case tests/models/language/pooling/test_embedding.py::test_models[True-ssmits/Qwen2-7B-Instruct-embed-base] fails on ppc64le. Disabling it for time being.
# pytest -v -s tests/models/language/pooling/test_embedding.py -m cpu_model" >> "$HOME"/test_rest.log
}
# All of CPU tests are expected to be finished less than 40 mins.
export container_id
export -f cpu_tests
timeout 120m bash -c cpu_tests
#!/bin/bash
# This script build the CPU docker image and run the offline inference inside the container.
# It serves a sanity check for compilation and basic model usage.
set -ex
# Setup cleanup
remove_docker_container() { docker rm -f cpu-test || true; docker system prune -f; }
trap remove_docker_container EXIT
remove_docker_container
# Try building the docker image
docker build -t cpu-test -f docker/Dockerfile.s390x .
#!/bin/bash
# This script build the CPU docker image and run the offline inference inside the container.
# It serves a sanity check for compilation and basic model usage.
set -euox pipefail
# allow to bind to different cores
CORE_RANGE=${CORE_RANGE:-48-95}
NUMA_NODE=${NUMA_NODE:-1}
IMAGE_NAME="cpu-test-$NUMA_NODE"
TIMEOUT_VAL=$1
TEST_COMMAND=$2
# building the docker image
echo "--- :docker: Building Docker image"
docker build --progress plain --tag "$IMAGE_NAME" --target vllm-test -f docker/Dockerfile.cpu .
# Run the image, setting --shm-size=4g for tensor parallel.
docker run --rm --cpuset-cpus="$CORE_RANGE" --cpuset-mems="$NUMA_NODE" -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN -e VLLM_CPU_KVCACHE_SPACE=16 -e VLLM_CPU_CI_ENV=1 -e VLLM_CPU_SIM_MULTI_NUMA=1 --shm-size=4g "$IMAGE_NAME" \
timeout "$TIMEOUT_VAL" bash -c "set -euox pipefail; echo \"--- Print packages\"; pip list; echo \"--- Running tests\"; ${TEST_COMMAND}"
#!/bin/bash
# This script build the GH200 docker image and run the offline inference inside the container.
# It serves a sanity check for compilation and basic model usage.
set -ex
# Skip the new torch installation during build since we are using the specified version for arm64 in the Dockerfile
python3 use_existing_torch.py
# Try building the docker image
DOCKER_BUILDKIT=1 docker build . \
--file docker/Dockerfile \
--target vllm-openai \
--platform "linux/arm64" \
-t gh200-test \
--build-arg max_jobs=66 \
--build-arg nvcc_threads=2 \
--build-arg RUN_WHEEL_CHECK=false \
--build-arg torch_cuda_arch_list="9.0+PTX"
# Setup cleanup
remove_docker_container() { docker rm -f gh200-test || true; }
trap remove_docker_container EXIT
remove_docker_container
# Run the image and test offline inference
docker run -e HF_TOKEN -e VLLM_WORKER_MULTIPROC_METHOD=spawn -v /root/.cache/huggingface:/root/.cache/huggingface --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '
python3 examples/offline_inference/basic/generate.py --model meta-llama/Llama-3.2-1B
'
#!/bin/bash
# This script builds the HPU docker image and runs the offline inference inside the container.
# It serves a sanity check for compilation and basic model usage.
#
# vllm-gaudi compatibility pinning:
# The vllm-gaudi plugin is installed on top of the vllm upstream checkout used by this CI job.
# When upstream vllm changes its API, the plugin may break before it has been updated.
# To handle this, the vllm-gaudi repository maintains a file:
# vllm/last-good-commit-for-vllm-gaudi/VLLM_COMMUNITY_COMMIT
# The first line of that file controls what version of vllm is used inside the Docker image:
# - "latest" : no checkout override; the current Buildkite CI commit is used as-is.
# - "<commit SHA>" : vllm is checked out to that specific commit before building, pinning
# the test to a known-compatible baseline.
# To unpin (resume testing against the live vllm tip), set the file content back to "latest".
set -exuo pipefail
# Fetch the vllm community commit reference from vllm-gaudi (first line only).
VLLM_COMMUNITY_COMMIT=$(curl -s \
https://raw.githubusercontent.com/vllm-project/vllm-gaudi/vllm/last-good-commit-for-vllm-gaudi/VLLM_COMMUNITY_COMMIT \
| head -1 | tr -d '\n')
echo "Using vllm community commit: ${VLLM_COMMUNITY_COMMIT}"
# Try building the docker image
image_name="hpu/upstream-vllm-ci:${BUILDKITE_COMMIT}"
container_name="hpu-upstream-vllm-ci-${BUILDKITE_COMMIT}-container"
cat <<EOF | docker build -t "${image_name}" -f - .
FROM gaudi-base-image:latest
COPY ./ /workspace/vllm
# If VLLM_COMMUNITY_COMMIT is a specific commit (not "latest"), check it out to pin vllm
# to the version known to be compatible with vllm-gaudi. When the value is "latest",
# the current checkout (the Buildkite CI commit) is used unchanged.
RUN if [ "${VLLM_COMMUNITY_COMMIT}" != "latest" ]; then \
cd /workspace/vllm && git fetch --unshallow 2>/dev/null || true && git checkout ${VLLM_COMMUNITY_COMMIT}; \
fi
WORKDIR /workspace/vllm
ENV no_proxy=localhost,127.0.0.1
ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true
RUN bash -c 'pip install -r <(sed "/^torch/d" requirements/build.txt)'
RUN VLLM_TARGET_DEVICE=empty pip install --no-build-isolation -e .
RUN pip install git+https://github.com/vllm-project/vllm-gaudi.git
# install development dependencies (for testing)
RUN python3 -m pip install -e tests/vllm_test_utils
WORKDIR /workspace/
RUN git clone https://github.com/vllm-project/vllm-gaudi.git
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
EOF
# Setup cleanup
# certain versions of HPU software stack have a bug that can
# override the exit code of the script, so we need to use
# separate remove_docker_containers and remove_docker_containers_and_exit
# functions, while other platforms only need one remove_docker_container
# function.
EXITCODE=1
remove_docker_containers() { docker rm -f "${container_name}" || true; }
trap 'remove_docker_containers; exit $EXITCODE;' EXIT
remove_docker_containers
echo "Running HPU plugin v1 test"
docker run --rm --runtime=habana --name="${container_name}" --network=host \
-e HABANA_VISIBLE_DEVICES=all \
-e VLLM_SKIP_WARMUP=true \
-e PT_HPU_ENABLE_LAZY_COLLECTIVES=true \
-e PT_HPU_LAZY_MODE=1 \
"${image_name}" \
/bin/bash -c '
cd vllm; timeout 120s python -u examples/offline_inference/basic/generate.py --model facebook/opt-125m
'
EXITCODE=$?
if [ $EXITCODE -eq 0 ]; then
echo "Test with basic model passed"
else
echo "Test with basic model FAILED with exit code: $EXITCODE" >&2
fi
# The trap will handle the container removal and final exit.
\ No newline at end of file
#!/bin/bash
# This script build the Ascend NPU docker image and run the offline inference inside the container.
# It serves a sanity check for compilation and basic model usage.
set -ex
# Base ubuntu image with basic ascend development libraries and python installed
VLLM_ASCEND_REPO="https://github.com/vllm-project/vllm-ascend.git"
CONFIG_FILE_REMOTE_PATH="tests/e2e/vllm_interface/vllm_test.cfg"
TEST_RUN_CONFIG_FILE="vllm_test.cfg"
VLLM_ASCEND_TMP_DIR=
# Get the test run configuration file from the vllm-ascend repository
fetch_vllm_test_cfg() {
VLLM_ASCEND_TMP_DIR=$(mktemp -d)
# Ensure that the temporary directory is cleaned up when an exception occurs during configuration file retrieval
cleanup() {
rm -rf "${VLLM_ASCEND_TMP_DIR}"
}
trap cleanup EXIT
GIT_TRACE=1 git clone -v --depth 1 "${VLLM_ASCEND_REPO}" "${VLLM_ASCEND_TMP_DIR}"
if [ ! -f "${VLLM_ASCEND_TMP_DIR}/${CONFIG_FILE_REMOTE_PATH}" ]; then
echo "Error: file '${CONFIG_FILE_REMOTE_PATH}' does not exist in the warehouse" >&2
exit 1
fi
# If the file already exists locally, just overwrite it
cp "${VLLM_ASCEND_TMP_DIR}/${CONFIG_FILE_REMOTE_PATH}" "${TEST_RUN_CONFIG_FILE}"
echo "Copied ${CONFIG_FILE_REMOTE_PATH} to ${TEST_RUN_CONFIG_FILE}"
# Since the trap will be overwritten later, and when it is executed here, the task of cleaning up resources
# when the trap is abnormal has been completed, so the temporary resources are manually deleted here.
rm -rf "${VLLM_ASCEND_TMP_DIR}"
trap - EXIT
}
# Downloads test run configuration file from a remote URL.
# Loads the configuration into the current script environment.
get_config() {
if [ ! -f "${TEST_RUN_CONFIG_FILE}" ]; then
echo "Error: file '${TEST_RUN_CONFIG_FILE}' does not exist in the warehouse" >&2
exit 1
fi
# shellcheck source=/dev/null
source "${TEST_RUN_CONFIG_FILE}"
echo "Base docker image name that get from configuration: ${BASE_IMAGE_NAME}"
return 0
}
# get test running configuration.
fetch_vllm_test_cfg
# Check if the function call was successful. If not, exit the script.
if ! get_config; then
exit 1
fi
image_name="npu/vllm-ci:${BUILDKITE_COMMIT}_${EPOCHSECONDS}"
container_name="npu_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"
# BUILDKITE_AGENT_NAME format is {hostname}-{agent_idx}-{npu_card_num}cards
agent_idx=$(echo "${BUILDKITE_AGENT_NAME}" | awk -F'-' '{print $(NF-1)}')
echo "agent_idx: ${agent_idx}"
builder_name="cachebuilder${agent_idx}"
builder_cache_dir="/mnt/docker-cache${agent_idx}"
mkdir -p "${builder_cache_dir}"
# Try building the docker image
cat <<EOF | DOCKER_BUILDKIT=1 docker build \
--add-host cache-service-vllm.nginx-pypi-cache.svc.cluster.local:"${PYPI_CACHE_HOST}" \
--builder "${builder_name}" --cache-from type=local,src="${builder_cache_dir}" \
--cache-to type=local,dest="${builder_cache_dir}",mode=max \
--progress=plain --load -t "${image_name}" -f - .
FROM ${BASE_IMAGE_NAME}
# Define environments
ENV DEBIAN_FRONTEND=noninteractive
ENV SOC_VERSION="ascend910b1"
RUN pip config set global.index-url http://cache-service-vllm.nginx-pypi-cache.svc.cluster.local:${PYPI_CACHE_PORT}/pypi/simple && \
pip config set global.trusted-host cache-service-vllm.nginx-pypi-cache.svc.cluster.local && \
apt-get update -y && \
apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev && \
rm -rf /var/cache/apt/* && \
rm -rf /var/lib/apt/lists/*
# Install for pytest to make the docker build cache layer always valid
RUN --mount=type=cache,target=/root/.cache/pip \
pip install pytest>=6.0 modelscope
WORKDIR /workspace/vllm
# Install vLLM dependencies in advance. Effect: As long as common.txt remains unchanged, the docker cache layer will be valid.
COPY requirements/common.txt /workspace/vllm/requirements/common.txt
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements/common.txt
COPY . .
# Install vLLM
RUN --mount=type=cache,target=/root/.cache/pip \
VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
python3 -m pip uninstall -y triton
# Install vllm-ascend
WORKDIR /workspace
ARG VLLM_ASCEND_REPO=https://github.com/vllm-project/vllm-ascend.git
ARG VLLM_ASCEND_TAG=main
RUN git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf "https://github.com/" && \
git clone --depth 1 \$VLLM_ASCEND_REPO --branch \$VLLM_ASCEND_TAG /workspace/vllm-ascend
# Install vllm dependencies in advance. Effect: As long as common.txt remains unchanged, the docker cache layer will be valid.
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r /workspace/vllm-ascend/requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip \
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
source /usr/local/Ascend/nnal/atb/set_env.sh && \
export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/devlib && \
python3 -m pip install -v -e /workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
ENV VLLM_USE_MODELSCOPE=True
WORKDIR /workspace/vllm-ascend
CMD ["/bin/bash"]
EOF
# Setup cleanup
remove_docker_container() {
docker rm -f "${container_name}" || true;
docker image rm -f "${image_name}" || true;
docker system prune -f || true;
}
trap remove_docker_container EXIT
# Generate corresponding --device args based on BUILDKITE_AGENT_NAME
# Ascend NPU BUILDKITE_AGENT_NAME format is {hostname}-{agent_idx}-{npu_card_num}cards, and agent_idx starts from 1.
# e.g. atlas-a2-001-1-2cards means this is the 1-th agent on atlas-a2-001 host, and it has 2 NPU cards.
# returns one argument per line: --device, /dev/davinciX, ...
parse_and_gen_devices() {
local input="$1"
local index cards_num
if [[ "$input" =~ ([0-9]+)-([0-9]+)cards$ ]]; then
index="${BASH_REMATCH[1]}"
cards_num="${BASH_REMATCH[2]}"
else
echo "parse error" >&2
return 1
fi
local i=0
while (( i < cards_num )); do
local dev_idx=$(((index - 1)*cards_num + i ))
printf '%s\n' "--device"
printf '%s\n' "/dev/davinci${dev_idx}"
((i++))
done
}
mapfile -t device_args < <(parse_and_gen_devices "${BUILDKITE_AGENT_NAME}") || exit 1
# Run the image and execute the Out-Of-Tree (OOT) platform interface test case on Ascend NPU hardware.
# This test checks whether the OOT platform interface is functioning properly in conjunction with
# the hardware plugin vllm-ascend.
model_cache_dir=/mnt/modelscope${agent_idx}
mkdir -p "${model_cache_dir}"
docker run \
"${device_args[@]}" \
--device /dev/davinci_manager \
--device /dev/devmm_svm \
--device /dev/hisi_hdc \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ \
-v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \
-v /etc/ascend_install.info:/etc/ascend_install.info \
-v "${model_cache_dir}":/root/.cache/modelscope \
--entrypoint="" \
--name "${container_name}" \
"${image_name}" \
bash -c '
set -e
pytest -v -s tests/e2e/vllm_interface/
'
#!/bin/bash
set -xu
remove_docker_container() {
docker rm -f tpu-test || true;
}
trap remove_docker_container EXIT
# Remove the container that might not be cleaned up in the previous run.
remove_docker_container
# Build the docker image.
docker build -f docker/Dockerfile.tpu -t vllm-tpu .
# Set up cleanup.
cleanup_docker() {
# Get Docker's root directory
docker_root=$(docker info -f '{{.DockerRootDir}}')
if [ -z "$docker_root" ]; then
echo "Failed to determine Docker root directory."
exit 1
fi
echo "Docker root directory: $docker_root"
# Check disk usage of the filesystem where Docker's root directory is located
disk_usage=$(df "$docker_root" | tail -1 | awk '{print $5}' | sed 's/%//')
# Define the threshold
threshold=70
if [ "$disk_usage" -gt "$threshold" ]; then
echo "Disk usage is above $threshold%. Cleaning up Docker images and volumes..."
# Remove dangling images (those that are not tagged and not used by any container)
docker image prune -f
# Remove unused volumes / force the system prune for old images as well.
docker volume prune -f && docker system prune --force --filter "until=72h" --all
echo "Docker images and volumes cleanup completed."
else
echo "Disk usage is below $threshold%. No cleanup needed."
fi
}
cleanup_docker
# For HF_TOKEN.
source /etc/environment
docker run --privileged --net host --shm-size=16G -it \
-e "HF_TOKEN=$HF_TOKEN" --name tpu-test \
vllm-tpu /bin/bash -c '
set -e # Exit immediately if a command exits with a non-zero status.
set -u # Treat unset variables as an error.
echo "--- Starting script inside Docker container ---"
# Create results directory
RESULTS_DIR=$(mktemp -d)
# If mktemp fails, set -e will cause the script to exit.
echo "Results will be stored in: $RESULTS_DIR"
# Install dependencies
echo "--- Installing Python dependencies ---"
python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \
&& python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \
&& python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.11" \
&& python3 -m pip install --progress-bar off hf-transfer tblib==3.1.0
echo "--- Python dependencies installed ---"
export VLLM_XLA_CHECK_RECOMPILATION=1
export VLLM_XLA_CACHE_PATH=
echo "--- Hardware Information ---"
# tpu-info
echo "--- Starting Tests ---"
set +e
overall_script_exit_code=0
# --- Test Definitions ---
# If a test fails, this function will print logs and will not cause the main script to exit.
run_test() {
local test_num=$1
local test_name=$2
local test_command=$3
local log_file="$RESULTS_DIR/test_${test_num}.log"
local actual_exit_code
echo "--- TEST_$test_num: Running $test_name ---"
# Execute the test command.
eval "$test_command" > >(tee -a "$log_file") 2> >(tee -a "$log_file" >&2)
actual_exit_code=$?
echo "TEST_${test_num}_COMMAND_EXIT_CODE: $actual_exit_code" # This goes to main log
echo "TEST_${test_num}_COMMAND_EXIT_CODE: $actual_exit_code" >> "$log_file" # Also to per-test log
if [ "$actual_exit_code" -ne 0 ]; then
echo "TEST_$test_num ($test_name) FAILED with exit code $actual_exit_code." >&2
echo "--- Log for failed TEST_$test_num ($test_name) ---" >&2
if [ -f "$log_file" ]; then
cat "$log_file" >&2
else
echo "Log file $log_file not found for TEST_$test_num ($test_name)." >&2
fi
echo "--- End of log for TEST_$test_num ($test_name) ---" >&2
return "$actual_exit_code" # Return the failure code
else
echo "TEST_$test_num ($test_name) PASSED."
return 0 # Return success
fi
}
# Helper function to call run_test and update the overall script exit code
run_and_track_test() {
local test_num_arg="$1"
local test_name_arg="$2"
local test_command_arg="$3"
# Run the test
run_test "$test_num_arg" "$test_name_arg" "$test_command_arg"
local test_specific_exit_code=$?
# If the test failed, set the overall script exit code to 1
if [ "$test_specific_exit_code" -ne 0 ]; then
# No need for extra echo here, run_test already logged the failure.
overall_script_exit_code=1
fi
}
# --- Actual Test Execution ---
run_and_track_test 1 "test_struct_output_generate.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py -k \"not test_structured_output_with_reasoning_matrices\""
run_and_track_test 2 "test_moe_pallas.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py"
run_and_track_test 3 "test_lora.py" \
"VLLM_XLA_CHECK_RECOMPILATION=0 python3 -m pytest -s -v /workspace/vllm/tests/tpu/lora/test_lora.py"
run_and_track_test 4 "test_tpu_qkv_linear.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_tpu_qkv_linear.py"
run_and_track_test 5 "test_spmd_model_weight_loading.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_spmd_model_weight_loading.py"
run_and_track_test 6 "test_kv_cache_update_kernel.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_kv_cache_update_kernel.py"
run_and_track_test 7 "test_tpu_int8.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_tpu_int8.py"
# After all tests have been attempted, exit with the overall status.
if [ "$overall_script_exit_code" -ne 0 ]; then
echo "--- One or more tests FAILED. Overall script exiting with failure code 1. ---"
else
echo "--- All tests have completed and PASSED. Overall script exiting with success code 0. ---"
fi
exit "$overall_script_exit_code"
' # IMPORTANT: This is the closing single quote for the bash -c "..." command. Ensure it is present and correct.
# Capture the exit code of the docker run command
DOCKER_RUN_EXIT_CODE=$?
# The trap will run for cleanup.
# Exit the main script with the Docker run command's exit code.
if [ "$DOCKER_RUN_EXIT_CODE" -ne 0 ]; then
echo "Docker run command failed with exit code $DOCKER_RUN_EXIT_CODE."
exit "$DOCKER_RUN_EXIT_CODE"
else
echo "Docker run command completed successfully."
exit 0
fi
# TODO: This test fails because it uses RANDOM_SEED sampling
# pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \
#!/bin/bash
set -xu
remove_docker_container() {
docker rm -f tpu-test || true;
}
trap remove_docker_container EXIT
# Remove the container that might not be cleaned up in the previous run.
remove_docker_container
# Build the docker image.
docker build -f docker/Dockerfile.tpu -t vllm-tpu .
# Set up cleanup.
cleanup_docker() {
# Get Docker's root directory
docker_root=$(docker info -f '{{.DockerRootDir}}')
if [ -z "$docker_root" ]; then
echo "Failed to determine Docker root directory."
exit 1
fi
echo "Docker root directory: $docker_root"
# Check disk usage of the filesystem where Docker's root directory is located
disk_usage=$(df "$docker_root" | tail -1 | awk '{print $5}' | sed 's/%//')
# Define the threshold
threshold=70
if [ "$disk_usage" -gt "$threshold" ]; then
echo "Disk usage is above $threshold%. Cleaning up Docker images and volumes..."
# Remove dangling images (those that are not tagged and not used by any container)
docker image prune -f
# Remove unused volumes / force the system prune for old images as well.
docker volume prune -f && docker system prune --force --filter "until=72h" --all
echo "Docker images and volumes cleanup completed."
else
echo "Disk usage is below $threshold%. No cleanup needed."
fi
}
cleanup_docker
# For HF_TOKEN.
source /etc/environment
docker run --privileged --net host --shm-size=16G -it \
-e "HF_TOKEN=$HF_TOKEN" --name tpu-test \
vllm-tpu /bin/bash -c '
set -e # Exit immediately if a command exits with a non-zero status.
set -u # Treat unset variables as an error.
echo "--- Starting script inside Docker container ---"
# Create results directory
RESULTS_DIR=$(mktemp -d)
# If mktemp fails, set -e will cause the script to exit.
echo "Results will be stored in: $RESULTS_DIR"
# Install dependencies
echo "--- Installing Python dependencies ---"
python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \
&& python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \
&& python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.11" \
&& python3 -m pip install --progress-bar off hf-transfer tblib==3.1.0
echo "--- Python dependencies installed ---"
export VLLM_XLA_CHECK_RECOMPILATION=1
export VLLM_XLA_CACHE_PATH=
echo "--- Hardware Information ---"
# tpu-info
echo "--- Starting Tests ---"
set +e
overall_script_exit_code=0
# --- Test Definitions ---
# If a test fails, this function will print logs and will not cause the main script to exit.
run_test() {
local test_num=$1
local test_name=$2
local test_command=$3
local log_file="$RESULTS_DIR/test_${test_num}.log"
local actual_exit_code
echo "--- TEST_$test_num: Running $test_name ---"
# Execute the test command.
eval "$test_command" > >(tee -a "$log_file") 2> >(tee -a "$log_file" >&2)
actual_exit_code=$?
echo "TEST_${test_num}_COMMAND_EXIT_CODE: $actual_exit_code" # This goes to main log
echo "TEST_${test_num}_COMMAND_EXIT_CODE: $actual_exit_code" >> "$log_file" # Also to per-test log
if [ "$actual_exit_code" -ne 0 ]; then
echo "TEST_$test_num ($test_name) FAILED with exit code $actual_exit_code." >&2
echo "--- Log for failed TEST_$test_num ($test_name) ---" >&2
if [ -f "$log_file" ]; then
cat "$log_file" >&2
else
echo "Log file $log_file not found for TEST_$test_num ($test_name)." >&2
fi
echo "--- End of log for TEST_$test_num ($test_name) ---" >&2
return "$actual_exit_code" # Return the failure code
else
echo "TEST_$test_num ($test_name) PASSED."
return 0 # Return success
fi
}
# Helper function to call run_test and update the overall script exit code
run_and_track_test() {
local test_num_arg="$1"
local test_name_arg="$2"
local test_command_arg="$3"
# Run the test
run_test "$test_num_arg" "$test_name_arg" "$test_command_arg"
local test_specific_exit_code=$?
# If the test failed, set the overall script exit code to 1
if [ "$test_specific_exit_code" -ne 0 ]; then
# No need for extra echo here, run_test already logged the failure.
overall_script_exit_code=1
fi
}
# --- Actual Test Execution ---
run_and_track_test 0 "test_perf.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_perf.py"
run_and_track_test 1 "test_compilation.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_compilation.py"
run_and_track_test 2 "test_basic.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_basic.py"
run_and_track_test 3 "test_accuracy.py::test_lm_eval_accuracy_v1_engine" \
"python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine"
run_and_track_test 4 "test_quantization_accuracy.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py"
run_and_track_test 5 "examples/offline_inference/tpu.py" \
"python3 /workspace/vllm/examples/offline_inference/tpu.py"
run_and_track_test 6 "test_tpu_model_runner.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/worker/test_tpu_model_runner.py"
run_and_track_test 7 "test_sampler.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py"
run_and_track_test 8 "test_topk_topp_sampler.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py"
run_and_track_test 9 "test_multimodal.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py"
run_and_track_test 10 "test_pallas.py" \
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py"
# After all tests have been attempted, exit with the overall status.
if [ "$overall_script_exit_code" -ne 0 ]; then
echo "--- One or more tests FAILED. Overall script exiting with failure code 1. ---"
else
echo "--- All tests have completed and PASSED. Overall script exiting with success code 0. ---"
fi
exit "$overall_script_exit_code"
' # IMPORTANT: This is the closing single quote for the bash -c "..." command. Ensure it is present and correct.
# Capture the exit code of the docker run command
DOCKER_RUN_EXIT_CODE=$?
# The trap will run for cleanup.
# Exit the main script with the Docker run command's exit code.
if [ "$DOCKER_RUN_EXIT_CODE" -ne 0 ]; then
echo "Docker run command failed with exit code $DOCKER_RUN_EXIT_CODE."
exit "$DOCKER_RUN_EXIT_CODE"
else
echo "Docker run command completed successfully."
exit 0
fi
# TODO: This test fails because it uses RANDOM_SEED sampling
# pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \
#!/bin/bash
# This script build the CPU docker image and run the offline inference inside the container.
# It serves a sanity check for compilation and basic model usage.
set -ex
image_name="xpu/vllm-ci:${BUILDKITE_COMMIT}"
container_name="xpu_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"
# Try building the docker image
docker build -t "${image_name}" -f docker/Dockerfile.xpu .
# Setup cleanup
remove_docker_container() {
docker rm -f "${container_name}" || true;
docker image rm -f "${image_name}" || true;
docker system prune -f || true;
}
trap remove_docker_container EXIT
# Run the image and test offline inference/tensor parallel
docker run \
--device /dev/dri:/dev/dri \
--net=host \
--ipc=host \
--privileged \
-v /dev/dri/by-path:/dev/dri/by-path \
--entrypoint="" \
-e "HF_TOKEN=${HF_TOKEN}" \
-e "ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK}" \
--name "${container_name}" \
"${image_name}" \
bash -c '
set -e
echo $ZE_AFFINITY_MASK
pip install tblib==3.1.0
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 -O3 -cc.cudagraph_mode=NONE
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager -tp 2 --distributed-executor-backend ray
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager -tp 2 --distributed-executor-backend mp
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager --attention-backend=TRITON_ATTN
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager --quantization fp8
python3 examples/offline_inference/basic/generate.py --model superjob/Qwen3-4B-Instruct-2507-GPTQ-Int4 --block-size 64 --enforce-eager
python3 examples/offline_inference/basic/generate.py --model ibm-research/PowerMoE-3b --block-size 64 --enforce-eager -tp 2
python3 examples/offline_inference/basic/generate.py --model ibm-research/PowerMoE-3b --block-size 64 --enforce-eager -tp 2 --enable-expert-parallel
cd tests
pytest -v -s v1/core --ignore=v1/core/test_reset_prefix_cache_e2e.py --ignore=v1/core/test_scheduler_e2e.py
pytest -v -s v1/engine
pytest -v -s v1/sample --ignore=v1/sample/test_logprobs.py --ignore=v1/sample/test_logprobs_e2e.py
pytest -v -s v1/worker --ignore=v1/worker/test_gpu_model_runner.py
pytest -v -s v1/structured_output
pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_tree_attention.py --ignore=v1/spec_decode/test_speculators_eagle3.py --ignore=v1/spec_decode/test_acceptance_length.py
pytest -v -s v1/kv_connector/unit --ignore=v1/kv_connector/unit/test_multi_connector.py --ignore=v1/kv_connector/unit/test_nixl_connector.py --ignore=v1/kv_connector/unit/test_example_connector.py --ignore=v1/kv_connector/unit/test_lmcache_integration.py
pytest -v -s v1/test_serial_utils.py
'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment