Unverified Commit 067c5d9a authored by Andreas Karatzas's avatar Andreas Karatzas Committed by GitHub
Browse files

[ROCm][CI] Added MI325 mirrors (#34923)


Signed-off-by: default avatarAndreas Karatzas <akaratza@amd.com>
parent f5972a87
#!/bin/bash #!/bin/bash
# This script runs test inside the corresponding ROCm docker container. # This script runs tests inside the corresponding ROCm docker container.
# It handles both single-node and multi-node test configurations.
#
# Multi-node detection: Instead of matching on fragile group names, we detect
# multi-node jobs structurally by looking for the bracket command syntax
# "[node0_cmds] && [node1_cmds]" or via the NUM_NODES environment variable.
set -o pipefail set -o pipefail
# Export Python path # Export Python path
export PYTHONPATH=".." export PYTHONPATH=".."
# Print ROCm version ###############################################################################
echo "--- Confirming Clean Initial State" # Helper Functions
while true; do ###############################################################################
sleep 3
if grep -q clean /opt/amdgpu/etc/gpu_state; then wait_for_clean_gpus() {
echo "GPUs state is \"clean\"" local timeout=${1:-300}
break local start=$SECONDS
fi echo "--- Waiting for clean GPU state (timeout: ${timeout}s)"
done while true; do
if grep -q clean /opt/amdgpu/etc/gpu_state; then
echo "--- ROCm info" echo "GPUs state is \"clean\""
rocminfo return
fi
if (( SECONDS - start >= timeout )); then
echo "Error: GPUs did not reach clean state within ${timeout}s" >&2
exit 1
fi
sleep 3
done
}
# cleanup older docker images
cleanup_docker() { cleanup_docker() {
# Get Docker's root directory # Get Docker's root directory
docker_root=$(docker info -f '{{.DockerRootDir}}') docker_root=$(docker info -f '{{.DockerRootDir}}')
...@@ -28,15 +40,12 @@ cleanup_docker() { ...@@ -28,15 +40,12 @@ cleanup_docker() {
exit 1 exit 1
fi fi
echo "Docker root directory: $docker_root" echo "Docker root directory: $docker_root"
# Check disk usage of the filesystem where Docker's root directory is located
disk_usage=$(df "$docker_root" | tail -1 | awk '{print $5}' | sed 's/%//') disk_usage=$(df "$docker_root" | tail -1 | awk '{print $5}' | sed 's/%//')
# Define the threshold
threshold=70 threshold=70
if [ "$disk_usage" -gt "$threshold" ]; then if [ "$disk_usage" -gt "$threshold" ]; then
echo "Disk usage is above $threshold%. Cleaning up Docker images and volumes..." echo "Disk usage is above $threshold%. Cleaning up Docker images and volumes..."
# Remove dangling images (those that are not tagged and not used by any container)
docker image prune -f docker image prune -f
# Remove unused volumes / force the system prune for old images as well.
docker volume prune -f && docker system prune --force --filter "until=72h" --all docker volume prune -f && docker system prune --force --filter "until=72h" --all
echo "Docker images and volumes cleanup completed." echo "Docker images and volumes cleanup completed."
else else
...@@ -45,193 +54,258 @@ cleanup_docker() { ...@@ -45,193 +54,258 @@ cleanup_docker() {
} }
cleanup_network() { cleanup_network() {
for node in $(seq 0 $((NUM_NODES-1))); do local max_nodes=${NUM_NODES:-2}
if docker pr -a -q -f name="node${node}" | grep -q .; then for node in $(seq 0 $((max_nodes - 1))); do
docker stop "node${node}" if docker ps -a -q -f name="node${node}" | grep -q .; then
docker stop "node${node}" || true
fi fi
done done
if docker network ls | grep docker-net; then if docker network ls | grep -q docker-net; then
docker network rm docker-net docker network rm docker-net || true
fi fi
} }
# Call the cleanup docker function is_multi_node() {
local cmds="$1"
# Primary signal: NUM_NODES environment variable set by the pipeline
if [[ "${NUM_NODES:-1}" -gt 1 ]]; then
return 0
fi
# Fallback: detect the bracket syntax structurally
# Pattern: [...] && [...] (per-node command arrays)
if [[ "$cmds" =~ \[.*\].*\&\&.*\[.*\] ]]; then
return 0
fi
return 1
}
###############################################################################
# Pytest marker re-quoting
#
# When commands are passed through Buildkite -> shell -> $* -> bash -c,
# quotes around pytest -m marker expressions get stripped:
# pytest -v -s -m 'not cpu_test' v1/core
# becomes:
# pytest -v -s -m not cpu_test v1/core
#
# pytest then interprets "cpu_test" as a file path, not part of the marker.
# This function detects unquoted multi-word marker expressions and re-quotes
# them so they survive the final bash -c expansion.
###############################################################################
re_quote_pytest_markers() {
local cmds="$1"
# Pattern: -m not <identifier> -> -m 'not <identifier>'
# Handles the common cases: 'not cpu_test', 'not slow_test', etc.
cmds=$(echo "$cmds" | sed -E "s/-m not ([a-zA-Z_][a-zA-Z0-9_]*)/-m 'not \1'/g")
echo "$cmds"
}
###############################################################################
# ROCm-specific pytest command rewrites
#
# These apply ignore flags and environment overrides for tests that are not
# yet supported or behave differently on ROCm hardware. Kept as a single
# function so new exclusions are easy to add in one place.
###############################################################################
apply_rocm_test_overrides() {
local cmds="$1"
# --- Model registry filter ---
if [[ $cmds == *"pytest -v -s models/test_registry.py"* ]]; then
cmds=${cmds//"pytest -v -s models/test_registry.py"/"pytest -v -s models/test_registry.py -k 'not BambaForCausalLM and not GritLM and not Mamba2ForCausalLM and not Zamba2ForCausalLM'"}
fi
# --- LoRA: disable custom paged attention ---
if [[ $cmds == *"pytest -v -s lora"* ]]; then
cmds=${cmds//"pytest -v -s lora"/"VLLM_ROCM_CUSTOM_PAGED_ATTN=0 pytest -v -s lora"}
fi
# --- Kernel ignores ---
if [[ $cmds == *" kernels/core"* ]]; then
cmds="${cmds} \
--ignore=kernels/core/test_fused_quant_layernorm.py \
--ignore=kernels/core/test_permute_cols.py"
fi
if [[ $cmds == *" kernels/attention"* ]]; then
cmds="${cmds} \
--ignore=kernels/attention/test_attention_selector.py \
--ignore=kernels/attention/test_encoder_decoder_attn.py \
--ignore=kernels/attention/test_flash_attn.py \
--ignore=kernels/attention/test_flashinfer.py \
--ignore=kernels/attention/test_prefix_prefill.py \
--ignore=kernels/attention/test_cascade_flash_attn.py \
--ignore=kernels/attention/test_mha_attn.py \
--ignore=kernels/attention/test_lightning_attn.py \
--ignore=kernels/attention/test_attention.py"
fi
if [[ $cmds == *" kernels/quantization"* ]]; then
cmds="${cmds} \
--ignore=kernels/quantization/test_int8_quant.py \
--ignore=kernels/quantization/test_machete_mm.py \
--ignore=kernels/quantization/test_block_fp8.py \
--ignore=kernels/quantization/test_block_int8.py \
--ignore=kernels/quantization/test_marlin_gemm.py \
--ignore=kernels/quantization/test_cutlass_scaled_mm.py \
--ignore=kernels/quantization/test_int8_kernel.py"
fi
if [[ $cmds == *" kernels/mamba"* ]]; then
cmds="${cmds} \
--ignore=kernels/mamba/test_mamba_mixer2.py \
--ignore=kernels/mamba/test_causal_conv1d.py \
--ignore=kernels/mamba/test_mamba_ssm_ssd.py"
fi
if [[ $cmds == *" kernels/moe"* ]]; then
cmds="${cmds} \
--ignore=kernels/moe/test_moe.py \
--ignore=kernels/moe/test_cutlass_moe.py \
--ignore=kernels/moe/test_triton_moe_ptpc_fp8.py"
fi
# --- Entrypoint ignores ---
if [[ $cmds == *" entrypoints/openai "* ]]; then
cmds=${cmds//" entrypoints/openai "/" entrypoints/openai \
--ignore=entrypoints/openai/test_audio.py \
--ignore=entrypoints/openai/test_shutdown.py \
--ignore=entrypoints/openai/test_completion.py \
--ignore=entrypoints/openai/test_models.py \
--ignore=entrypoints/openai/test_lora_adapters.py \
--ignore=entrypoints/openai/test_return_tokens_as_ids.py \
--ignore=entrypoints/openai/test_root_path.py \
--ignore=entrypoints/openai/test_tokenization.py \
--ignore=entrypoints/openai/test_prompt_validation.py "}
fi
if [[ $cmds == *" entrypoints/llm "* ]]; then
cmds=${cmds//" entrypoints/llm "/" entrypoints/llm \
--ignore=entrypoints/llm/test_chat.py \
--ignore=entrypoints/llm/test_accuracy.py \
--ignore=entrypoints/llm/test_init.py \
--ignore=entrypoints/llm/test_prompt_validation.py "}
fi
# Clean up escaped newlines from --ignore appends
cmds=$(echo "$cmds" | sed 's/ \\ / /g')
echo "$cmds"
}
###############################################################################
# Main
###############################################################################
# --- GPU initialization ---
echo "--- Confirming Clean Initial State"
wait_for_clean_gpus
echo "--- ROCm info"
rocminfo
# --- Docker housekeeping ---
cleanup_docker cleanup_docker
echo "--- Resetting GPUs" echo "--- Resetting GPUs"
echo "reset" > /opt/amdgpu/etc/gpu_state echo "reset" > /opt/amdgpu/etc/gpu_state
wait_for_clean_gpus
while true; do # --- Pull test image ---
sleep 3
if grep -q clean /opt/amdgpu/etc/gpu_state; then
echo "GPUs state is \"clean\""
break
fi
done
echo "--- Pulling container" echo "--- Pulling container"
image_name="rocm/vllm-ci:${BUILDKITE_COMMIT}" image_name="rocm/vllm-ci:${BUILDKITE_COMMIT}"
container_name="rocm_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)" container_name="rocm_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"
docker pull "${image_name}" docker pull "${image_name}"
remove_docker_container() { remove_docker_container() {
docker rm -f "${container_name}" || docker image rm -f "${image_name}" || true docker rm -f "${container_name}" || docker image rm -f "${image_name}" || true
} }
trap remove_docker_container EXIT trap remove_docker_container EXIT
# --- Prepare commands ---
echo "--- Running container" echo "--- Running container"
HF_CACHE="$(realpath ~)/huggingface" HF_CACHE="$(realpath ~)/huggingface"
mkdir -p "${HF_CACHE}" mkdir -p "${HF_CACHE}"
HF_MOUNT="/root/.cache/huggingface" HF_MOUNT="/root/.cache/huggingface"
commands=$@ commands="$*"
echo "Raw commands: $commands" echo "Raw commands: $commands"
commands=${commands//"pytest -v -s basic_correctness/test_basic_correctness.py"/"pytest -v -s basic_correctness/test_basic_correctness.py"} # Fix quoting before ROCm overrides (so overrides see correct structure)
commands=$(re_quote_pytest_markers "$commands")
if [[ $commands == *"pytest -v -s models/test_registry.py"* ]]; then commands=$(apply_rocm_test_overrides "$commands")
commands=${commands//"pytest -v -s models/test_registry.py"/"pytest -v -s models/test_registry.py -k 'not BambaForCausalLM and not GritLM and not Mamba2ForCausalLM and not Zamba2ForCausalLM'"}
fi
commands=${commands//"pytest -v -s compile/test_basic_correctness.py"/"pytest -v -s compile/test_basic_correctness.py"}
if [[ $commands == *"pytest -v -s lora"* ]]; then
commands=${commands//"pytest -v -s lora"/"VLLM_ROCM_CUSTOM_PAGED_ATTN=0 pytest -v -s lora"}
fi
#ignore certain kernels tests
if [[ $commands == *" kernels/core"* ]]; then
commands="${commands} \
--ignore=kernels/core/test_fused_quant_layernorm.py \
--ignore=kernels/core/test_permute_cols.py"
fi
if [[ $commands == *" kernels/attention"* ]]; then
commands="${commands} \
--ignore=kernels/attention/test_attention_selector.py \
--ignore=kernels/attention/test_encoder_decoder_attn.py \
--ignore=kernels/attention/test_flash_attn.py \
--ignore=kernels/attention/test_flashinfer.py \
--ignore=kernels/attention/test_prefix_prefill.py \
--ignore=kernels/attention/test_cascade_flash_attn.py \
--ignore=kernels/attention/test_mha_attn.py \
--ignore=kernels/attention/test_lightning_attn.py \
--ignore=kernels/attention/test_attention.py"
fi
if [[ $commands == *" kernels/quantization"* ]]; then
commands="${commands} \
--ignore=kernels/quantization/test_int8_quant.py \
--ignore=kernels/quantization/test_machete_mm.py \
--ignore=kernels/quantization/test_block_fp8.py \
--ignore=kernels/quantization/test_block_int8.py \
--ignore=kernels/quantization/test_marlin_gemm.py \
--ignore=kernels/quantization/test_cutlass_scaled_mm.py \
--ignore=kernels/quantization/test_int8_kernel.py"
fi
if [[ $commands == *" kernels/mamba"* ]]; then
commands="${commands} \
--ignore=kernels/mamba/test_mamba_mixer2.py \
--ignore=kernels/mamba/test_causal_conv1d.py \
--ignore=kernels/mamba/test_mamba_ssm_ssd.py"
fi
if [[ $commands == *" kernels/moe"* ]]; then
commands="${commands} \
--ignore=kernels/moe/test_moe.py \
--ignore=kernels/moe/test_cutlass_moe.py \
--ignore=kernels/moe/test_triton_moe_ptpc_fp8.py"
fi
#ignore certain Entrypoints/openai tests
if [[ $commands == *" entrypoints/openai "* ]]; then
commands=${commands//" entrypoints/openai "/" entrypoints/openai \
--ignore=entrypoints/openai/test_audio.py \
--ignore=entrypoints/openai/test_shutdown.py \
--ignore=entrypoints/openai/test_completion.py \
--ignore=entrypoints/openai/test_models.py \
--ignore=entrypoints/openai/test_lora_adapters.py \
--ignore=entrypoints/openai/test_return_tokens_as_ids.py \
--ignore=entrypoints/openai/test_root_path.py \
--ignore=entrypoints/openai/test_tokenization.py \
--ignore=entrypoints/openai/test_prompt_validation.py "}
fi
#ignore certain Entrypoints/llm tests
if [[ $commands == *" entrypoints/llm "* ]]; then
commands=${commands//" entrypoints/llm "/" entrypoints/llm \
--ignore=entrypoints/llm/test_chat.py \
--ignore=entrypoints/llm/test_accuracy.py \
--ignore=entrypoints/llm/test_init.py \
--ignore=entrypoints/llm/test_prompt_validation.py "}
fi
commands=$(echo "$commands" | sed 's/ \\ / /g')
echo "Final commands: $commands" echo "Final commands: $commands"
# --ignore=entrypoints/openai/test_encoder_decoder.py \
# --ignore=entrypoints/openai/test_embedding.py \
# --ignore=entrypoints/openai/test_oot_registration.py
# --ignore=entrypoints/openai/test_accuracy.py \
# --ignore=entrypoints/openai/test_models.py <= Fails on MI250 but passes on MI300 as of 2025-03-13
MYPYTHONPATH=".." MYPYTHONPATH=".."
# Test that we're launching on the machine that has # Verify GPU access
# proper access to GPUs
render_gid=$(getent group render | cut -d: -f3) render_gid=$(getent group render | cut -d: -f3)
if [[ -z "$render_gid" ]]; then if [[ -z "$render_gid" ]]; then
echo "Error: 'render' group not found. This is required for GPU access." >&2 echo "Error: 'render' group not found. This is required for GPU access." >&2
exit 1 exit 1
fi fi
if [[ $commands == *"VLLM_TEST_GROUP_NAME=mi325_4-2-node-tests-4-gpus-in-total"* ]]; then # --- Route: multi-node vs single-node ---
if is_multi_node "$commands"; then
echo "--- Multi-node job detected"
export DCKR_VER=$(docker --version | sed 's/Docker version \(.*\), build .*/\1/') export DCKR_VER=$(docker --version | sed 's/Docker version \(.*\), build .*/\1/')
if [[ "$commands" =~ ^(.*)"["(.*)"] && ["(.*)"]"$ ]]; then # Parse the bracket syntax: prefix ; [node0_cmds] && [node1_cmds]
prefix=$( echo "${BASH_REMATCH[1]}" | sed 's/;//g') # BASH_REMATCH[1] = prefix (everything before first bracket)
echo "PREFIX: ${prefix}" # BASH_REMATCH[2] = comma-separated node0 commands
export composite_command="(command rocm-smi || true)" # BASH_REMATCH[3] = comma-separated node1 commands
myIFS=$IFS if [[ "$commands" =~ ^(.*)\[(.*)"] && ["(.*)\]$ ]]; then
IFS=',' prefix=$(echo "${BASH_REMATCH[1]}" | sed 's/;//g')
read -ra node0 <<< ${BASH_REMATCH[2]} echo "PREFIX: ${prefix}"
read -ra node1 <<< ${BASH_REMATCH[3]}
IFS=$myIFS export composite_command="(command rocm-smi || true)"
for i in "${!node0[@]}";do saved_IFS=$IFS
command_node_0=$(echo ${node0[i]} | sed 's/\"//g') IFS=','
command_node_1=$(echo ${node1[i]} | sed 's/\"//g') read -ra node0 <<< "${BASH_REMATCH[2]}"
read -ra node1 <<< "${BASH_REMATCH[3]}"
export commands="./.buildkite/scripts/run-multi-node-test.sh /vllm-workspace/tests 2 2 ${image_name} '${command_node_0}' '${command_node_1}'" IFS=$saved_IFS
echo "COMMANDS: ${commands}"
composite_command=$(echo "${composite_command} && ${commands}") if [[ ${#node0[@]} -ne ${#node1[@]} ]]; then
done echo "Warning: node0 has ${#node0[@]} commands, node1 has ${#node1[@]}. They will be paired by index."
/bin/bash -c "${composite_command}" fi
cleanup_network
for i in "${!node0[@]}"; do
command_node_0=$(echo "${node0[i]}" | sed 's/\"//g')
command_node_1=$(echo "${node1[i]}" | sed 's/\"//g')
step_cmd="./.buildkite/scripts/run-multi-node-test.sh /vllm-workspace/tests 2 2 ${image_name} '${command_node_0}' '${command_node_1}'"
echo "COMMANDS: ${step_cmd}"
composite_command="${composite_command} && ${step_cmd}"
done
/bin/bash -c "${composite_command}"
cleanup_network
else else
echo "Failed to parse node commands! Exiting." echo "Multi-node job detected but failed to parse bracket command syntax."
cleanup_network echo "Expected format: prefix ; [node0_cmd1, node0_cmd2] && [node1_cmd1, node1_cmd2]"
exit 111 echo "Got: $commands"
cleanup_network
exit 111
fi fi
else else
echo "--- Single-node job"
echo "Render devices: $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES" echo "Render devices: $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES"
docker run \ docker run \
--device /dev/kfd $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES \ --device /dev/kfd $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES \
--network=host \ --network=host \
--shm-size=16gb \ --shm-size=16gb \
--group-add "$render_gid" \ --group-add "$render_gid" \
--rm \ --rm \
-e HF_TOKEN \ -e HF_TOKEN \
-e AWS_ACCESS_KEY_ID \ -e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \ -e AWS_SECRET_ACCESS_KEY \
-v "${HF_CACHE}:${HF_MOUNT}" \ -v "${HF_CACHE}:${HF_MOUNT}" \
-e "HF_HOME=${HF_MOUNT}" \ -e "HF_HOME=${HF_MOUNT}" \
-e "PYTHONPATH=${MYPYTHONPATH}" \ -e "PYTHONPATH=${MYPYTHONPATH}" \
--name "${container_name}" \ --name "${container_name}" \
"${image_name}" \ "${image_name}" \
/bin/bash -c "${commands}" /bin/bash -c "${commands}"
fi fi
...@@ -28,3 +28,11 @@ steps: ...@@ -28,3 +28,11 @@ steps:
- pytest -v -s v1/engine/test_preprocess_error_handling.py - pytest -v -s v1/engine/test_preprocess_error_handling.py
# Run the rest of v1/engine tests # Run the rest of v1/engine tests
- pytest -v -s v1/engine --ignore v1/engine/test_preprocess_error_handling.py - pytest -v -s v1/engine --ignore v1/engine/test_preprocess_error_handling.py
mirror:
amd:
device: mi325_8
depends_on:
- image-build-amd
commands:
- pytest -v -s v1/e2e
- pytest -v -s v1/engine
...@@ -24,11 +24,6 @@ steps: ...@@ -24,11 +24,6 @@ steps:
- pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
- pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
- pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests - pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
mirror:
amd:
device: mi325_1
depends_on:
- image-build-amd
- label: Entrypoints Integration (API Server 1) - label: Entrypoints Integration (API Server 1)
timeout_in_minutes: 130 timeout_in_minutes: 130
...@@ -65,6 +60,11 @@ steps: ...@@ -65,6 +60,11 @@ steps:
commands: commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/pooling - pytest -v -s entrypoints/pooling
mirror:
amd:
device: mi325_1
depends_on:
- image-build-amd
- label: Entrypoints Integration (Responses API) - label: Entrypoints Integration (Responses API)
timeout_in_minutes: 50 timeout_in_minutes: 50
......
...@@ -16,6 +16,7 @@ steps: ...@@ -16,6 +16,7 @@ steps:
- pytest -v -s v1/sample - pytest -v -s v1/sample
- pytest -v -s v1/logits_processors - pytest -v -s v1/logits_processors
- pytest -v -s v1/worker - pytest -v -s v1/worker
# TODO: create another `optional` test group for slow tests
- pytest -v -s -m 'not slow_test' v1/spec_decode - pytest -v -s -m 'not slow_test' v1/spec_decode
- pytest -v -s -m 'not cpu_test' v1/kv_connector/unit - pytest -v -s -m 'not cpu_test' v1/kv_connector/unit
- pytest -v -s -m 'not cpu_test' v1/metrics - pytest -v -s -m 'not cpu_test' v1/metrics
...@@ -25,6 +26,11 @@ steps: ...@@ -25,6 +26,11 @@ steps:
# Integration test for streaming correctness (requires special branch). # Integration test for streaming correctness (requires special branch).
- pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api - pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
- pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
mirror:
amd:
device: mi325_1
depends_on:
- image-build-amd
- label: V1 Others (CPU) - label: V1 Others (CPU)
depends_on: depends_on:
......
...@@ -55,6 +55,15 @@ steps: ...@@ -55,6 +55,15 @@ steps:
- uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.3.0' - uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.3.0'
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2' - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
- pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)' - pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)'
mirror:
amd:
device: mi325_1
depends_on:
- image-build-amd
commands:
- uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@fix-rocm-7.0-warp-size-constexpr'
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
- pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)'
- label: Language Models Test (PPL) - label: Language Models Test (PPL)
timeout_in_minutes: 110 timeout_in_minutes: 110
...@@ -73,6 +82,11 @@ steps: ...@@ -73,6 +82,11 @@ steps:
- tests/models/language/pooling - tests/models/language/pooling
commands: commands:
- pytest -v -s models/language/pooling -m 'not core_model' - pytest -v -s models/language/pooling -m 'not core_model'
mirror:
amd:
device: mi325_1
depends_on:
- image-build-amd
- label: Language Models Test (MTEB) - label: Language Models Test (MTEB)
timeout_in_minutes: 110 timeout_in_minutes: 110
......
...@@ -305,6 +305,14 @@ RUN --mount=type=bind,from=export_vllm,src=/,target=/install \ ...@@ -305,6 +305,14 @@ RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
RUN --mount=type=bind,from=build_rixl,src=/app/install,target=/rixl_install \ RUN --mount=type=bind,from=build_rixl,src=/app/install,target=/rixl_install \
uv pip install --system /rixl_install/*.whl uv pip install --system /rixl_install/*.whl
# RIXL/MoRIIO runtime dependencies (RDMA userspace libraries)
RUN apt-get update -q -y && apt-get install -q -y \
librdmacm1 \
libibverbs1 \
ibverbs-providers \
ibverbs-utils \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /vllm-workspace WORKDIR /vllm-workspace
ARG COMMON_WORKDIR ARG COMMON_WORKDIR
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm /vllm-workspace COPY --from=build_vllm ${COMMON_WORKDIR}/vllm /vllm-workspace
...@@ -330,6 +338,11 @@ RUN bash /tmp/install_torchcodec.sh \ ...@@ -330,6 +338,11 @@ RUN bash /tmp/install_torchcodec.sh \
# Copy in the v1 package (for python-only install test group) # Copy in the v1 package (for python-only install test group)
COPY --from=export_vllm /vllm_v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1 COPY --from=export_vllm /vllm_v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1
# Set MIOPEN ENVS to resolve performance regressions in MIOpen 3D convolution kernel
# See: https://github.com/pytorch/pytorch/issues/169857
ENV MIOPEN_DEBUG_CONV_DIRECT=0
ENV MIOPEN_DEBUG_CONV_GEMM=0
# Source code is used in the `python_only_compile.sh` test # Source code is used in the `python_only_compile.sh` test
# We hide it inside `src/` so that this source code # We hide it inside `src/` so that this source code
# will not be imported by other tests # will not be imported by other tests
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import importlib.util import importlib.util
import os import os
import subprocess
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
import msgspec import msgspec
...@@ -40,6 +41,19 @@ from .utils import create_request, create_scheduler ...@@ -40,6 +41,19 @@ from .utils import create_request, create_scheduler
aiter_available = importlib.util.find_spec("aiter") is not None aiter_available = importlib.util.find_spec("aiter") is not None
mori_available = importlib.util.find_spec("mori") is not None mori_available = importlib.util.find_spec("mori") is not None
def _rdma_available() -> bool:
"""Check if RDMA devices are available."""
try:
result = subprocess.run(["ibv_devinfo"], capture_output=True, text=True)
return "No IB devices found" not in result.stderr
except FileNotFoundError:
return False
rdma_available = _rdma_available()
pytestmark = pytest.mark.skipif( pytestmark = pytest.mark.skipif(
not (current_platform.is_rocm() and mori_available), not (current_platform.is_rocm() and mori_available),
reason="MoRIIOs are only available on ROCm with aiter package installed", reason="MoRIIOs are only available on ROCm with aiter package installed",
...@@ -393,6 +407,7 @@ def test_read_mode_loads_remote_block_ids(moriio_read_mode): ...@@ -393,6 +407,7 @@ def test_read_mode_loads_remote_block_ids(moriio_read_mode):
@pytest.mark.skipif( @pytest.mark.skipif(
not aiter_available, reason="Requires aiter package for ROCm FlashAttention backend" not aiter_available, reason="Requires aiter package for ROCm FlashAttention backend"
) )
@pytest.mark.skipif(not rdma_available, reason="No RDMA devices available")
def test_register_kv_caches(mock_parallel_groups): def test_register_kv_caches(mock_parallel_groups):
"""Test that MoRIIOConnector.register_kv_caches correctly registers kv caches.""" """Test that MoRIIOConnector.register_kv_caches correctly registers kv caches."""
ROLE = "kv_consumer" ROLE = "kv_consumer"
...@@ -488,6 +503,7 @@ def test_register_kv_caches(mock_parallel_groups): ...@@ -488,6 +503,7 @@ def test_register_kv_caches(mock_parallel_groups):
@pytest.mark.skipif( @pytest.mark.skipif(
not aiter_available, reason="Requires aiter package for ROCm FlashAttention backend" not aiter_available, reason="Requires aiter package for ROCm FlashAttention backend"
) )
@pytest.mark.skipif(not rdma_available, reason="No RDMA devices available")
def test_moriio_handshake_returns_metadata(mock_parallel_groups): def test_moriio_handshake_returns_metadata(mock_parallel_groups):
"""MoRIIO handshake socket returns valid agent metadata over ZMQ.""" """MoRIIO handshake socket returns valid agent metadata over ZMQ."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment