Unverified Commit b2a55186 authored by xiangdong's avatar xiangdong Committed by GitHub
Browse files

[XPU][CI] Add misc, engine and lora cases on Intel GPU in CI (#39887)


Signed-off-by: default avatarzengxian <xiangdong.zeng@intel.com>
Co-authored-by: default avatarKunshang Ji <kunshang.ji@intel.com>
parent 908a7134
group: Engine Intel
depends_on:
- image-build-xpu
steps:
- label: Engine (1 GPU)
timeout_in_minutes: 30
device: intel_gpu
no_plugin: true
working_dir: "."
env:
REGISTRY: "public.ecr.aws/q9t5s3a7"
REPO: "vllm-ci-test-repo"
VLLM_TEST_DEVICE: "xpu"
source_file_dependencies:
- vllm/v1/engine/
- tests/v1/engine/
commands:
- >-
bash .buildkite/scripts/hardware_ci/run-intel-test.sh
'cd tests &&
pytest -v -s v1/engine --ignore v1/engine/test_preprocess_error_handling.py'
group: Kernels Intel
depends_on:
- image-build-xpu
steps:
- label: vLLM IR Tests
timeout_in_minutes: 30
device: intel_gpu
no_plugin: true
working_dir: "."
env:
REGISTRY: "public.ecr.aws/q9t5s3a7"
REPO: "vllm-ci-test-repo"
VLLM_TEST_DEVICE: "xpu"
source_file_dependencies:
- vllm/ir
- vllm/kernels
commands:
- >-
bash .buildkite/scripts/hardware_ci/run-intel-test.sh
'cd tests &&
pytest -v -s kernels/ir'
group: LoRA Intel
depends_on:
- image-build-xpu
steps:
- label: LoRA Runtime + Utils
timeout_in_minutes: 45
device: intel_gpu
no_plugin: true
working_dir: "."
env:
REGISTRY: "public.ecr.aws/q9t5s3a7"
REPO: "vllm-ci-test-repo"
VLLM_TEST_DEVICE: "xpu"
source_file_dependencies:
- vllm/lora
- tests/lora
commands:
- >-
bash .buildkite/scripts/hardware_ci/run-intel-test.sh
'cd tests &&
pytest -v -s lora/test_layers.py &&
pytest -v -s lora/test_lora_checkpoints.py &&
pytest -v -s lora/test_lora_functions.py &&
pytest -v -s lora/test_lora_huggingface.py &&
pytest -v -s lora/test_lora_manager.py &&
pytest -v -s lora/test_lora_utils.py &&
pytest -v -s lora/test_peft_helper.py &&
pytest -v -s lora/test_resolver.py &&
pytest -v -s lora/test_utils.py &&
(pytest -v -s lora/test_add_lora.py --deselect="tests/lora/test_add_lora.py::test_add_lora" || true) &&
(pytest -v -s lora/test_worker.py --deselect="tests/lora/test_worker.py::test_worker_apply_lora" || true)'
- label: LoRA Fused/MoE Kernels
timeout_in_minutes: 45
device: intel_gpu
no_plugin: true
working_dir: "."
env:
REGISTRY: "public.ecr.aws/q9t5s3a7"
REPO: "vllm-ci-test-repo"
VLLM_TEST_DEVICE: "xpu"
source_file_dependencies:
- vllm/lora
- tests/lora
commands:
- >-
bash .buildkite/scripts/hardware_ci/run-intel-test.sh
'cd tests &&
pytest -v -s lora/test_fused_moe_lora_kernel.py &&
pytest -v -s lora/test_moe_lora_align_sum.py'
- label: LoRA Punica Kernels
timeout_in_minutes: 45
device: intel_gpu
no_plugin: true
working_dir: "."
env:
REGISTRY: "public.ecr.aws/q9t5s3a7"
REPO: "vllm-ci-test-repo"
VLLM_TEST_DEVICE: "xpu"
source_file_dependencies:
- vllm/lora
- tests/lora
commands:
- >-
bash .buildkite/scripts/hardware_ci/run-intel-test.sh
'cd tests &&
set -o pipefail &&
pytest -v -s lora/test_punica_ops.py --deselect="tests/lora/test_punica_ops.py::test_kernels[shrink-0-xpu:0-dtype0-2-2049-64-32-32]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[expand-0-xpu:0-dtype1-2-64000-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels[shrink-0-xpu:0-dtype0-1-2049-128-1-32]" --deselect="tests/lora/test_punica_ops.py::test_kernels[shrink-0-xpu:0-dtype0-1-2049-256-1-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels[shrink-0-xpu:0-dtype0-1-2049-256-8-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels[expand-0-xpu:0-dtype0-3-2049-128-8-16]" --deselect="tests/lora/test_punica_ops.py::test_kernels[shrink-0-xpu:0-dtype0-1-2049-128-8-32]" --deselect="tests/lora/test_punica_ops.py::test_kernels[expand-0-xpu:0-dtype1-1-2049-256-128-32]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[shrink-0-xpu:0-dtype0-3-64256-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[shrink-0-xpu:0-dtype1-2-29696-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[shrink-0-xpu:0-dtype1-3-49408-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[shrink-0-xpu:0-dtype0-2-16384-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[expand-0-xpu:0-dtype0-2-51328-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[expand-0-xpu:0-dtype1-1-102656-32-4-4]"'
- label: LoRA Punica FP8/XPU Ops
timeout_in_minutes: 45
device: intel_gpu
no_plugin: true
working_dir: "."
env:
REGISTRY: "public.ecr.aws/q9t5s3a7"
REPO: "vllm-ci-test-repo"
VLLM_TEST_DEVICE: "xpu"
source_file_dependencies:
- vllm/lora
- tests/lora
commands:
- >-
bash .buildkite/scripts/hardware_ci/run-intel-test.sh
'cd tests &&
pytest -v -s lora/test_punica_ops_fp8.py &&
pytest -v -s lora/test_punica_xpu_ops.py'
- label: LoRA Models
timeout_in_minutes: 45
device: intel_gpu
no_plugin: true
working_dir: "."
env:
REGISTRY: "public.ecr.aws/q9t5s3a7"
REPO: "vllm-ci-test-repo"
VLLM_TEST_DEVICE: "xpu"
source_file_dependencies:
- vllm/lora
- tests/lora
commands:
- >-
bash .buildkite/scripts/hardware_ci/run-intel-test.sh
'cd tests &&
(pytest -v -s lora/test_mixtral.py --deselect="tests/lora/test_mixtral.py::test_mixtral_lora[4]" || true) &&
pytest -v -s lora/test_quant_model.py --deselect="tests/lora/test_quant_model.py::test_quant_model_lora[model0]" --deselect="tests/lora/test_quant_model.py::test_quant_model_lora[model1]" --deselect="tests/lora/test_quant_model.py::test_quant_model_tp_equality[model0]" &&
pytest -v -s lora/test_qwen35_densemodel_lora.py &&
pytest -v -s lora/test_transformers_model.py'
- label: LoRA Multimodal
timeout_in_minutes: 45
device: intel_gpu
no_plugin: true
working_dir: "."
env:
REGISTRY: "public.ecr.aws/q9t5s3a7"
REPO: "vllm-ci-test-repo"
VLLM_TEST_DEVICE: "xpu"
source_file_dependencies:
- vllm/lora
- tests/lora
commands:
- >-
bash .buildkite/scripts/hardware_ci/run-intel-test.sh
'cd tests &&
pytest -v -s lora/test_default_mm_loras.py &&
pytest -v -s lora/test_qwen3_unembed.py &&
pytest -v -s lora/test_qwenvl.py &&
pytest -v -s lora/test_whisper.py'
group: Miscellaneous Intel
depends_on:
- image-build-xpu
steps:
- label: V1 Core + KV + Metrics
timeout_in_minutes: 30
device: intel_gpu
no_plugin: true
working_dir: "."
env:
REGISTRY: "public.ecr.aws/q9t5s3a7"
REPO: "vllm-ci-test-repo"
VLLM_TEST_DEVICE: "xpu"
source_file_dependencies:
- vllm/
- tests/v1/core
- tests/v1/executor
- tests/v1/kv_offload
- tests/v1/worker
- tests/v1/kv_connector/unit
- tests/v1/metrics
- tests/entrypoints/openai/correctness/test_lmeval.py
commands:
- >-
bash .buildkite/scripts/hardware_ci/run-intel-test.sh
'pip install -r requirements/kv_connectors.txt &&
export VLLM_WORKER_MULTIPROC_METHOD=spawn &&
cd tests &&
pytest -v -s v1/executor'
- label: V1 Sample + Logits
timeout_in_minutes: 30
device: intel_gpu
no_plugin: true
working_dir: "."
env:
REGISTRY: "public.ecr.aws/q9t5s3a7"
REPO: "vllm-ci-test-repo"
VLLM_TEST_DEVICE: "xpu"
source_file_dependencies:
- vllm/
- tests/v1/sample
- tests/v1/logits_processors
- tests/v1/test_oracle.py
- tests/v1/test_request.py
- tests/v1/test_outputs.py
commands:
- >-
bash .buildkite/scripts/hardware_ci/run-intel-test.sh
'export VLLM_WORKER_MULTIPROC_METHOD=spawn &&
cd tests &&
pytest -v -s v1/logits_processors &&
pytest -v -s v1/test_oracle.py &&
pytest -v -s v1/test_request.py &&
pytest -v -s v1/test_outputs.py'
...@@ -240,7 +240,6 @@ fi ...@@ -240,7 +240,6 @@ fi
cleanup_docker cleanup_docker
aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY" aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 936637512419.dkr.ecr.us-east-1.amazonaws.com
# --- Build or pull test image --- # --- Build or pull test image ---
IMAGE="${IMAGE_TAG_XPU:-${image_name}}" IMAGE="${IMAGE_TAG_XPU:-${image_name}}"
...@@ -283,6 +282,7 @@ docker run \ ...@@ -283,6 +282,7 @@ docker run \
--ipc=host \ --ipc=host \
--privileged \ --privileged \
-v /dev/dri/by-path:/dev/dri/by-path \ -v /dev/dri/by-path:/dev/dri/by-path \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
--entrypoint="" \ --entrypoint="" \
-e "HF_TOKEN=${HF_TOKEN:-}" \ -e "HF_TOKEN=${HF_TOKEN:-}" \
-e "ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK:-}" \ -e "ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK:-}" \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment