[XPU][CI] Add misc, engine and lora cases on Intel GPU in CI (#39887)

Signed-off-by: zengxian <xiangdong.zeng@intel.com> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com>

[XPU][CI] Add misc, engine and lora cases on Intel GPU in CI (#39887)
Signed-off-by: zengxian <xiangdong.zeng@intel.com> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com>
b2a55186 · xiangdong · GitHub · 908a7134 · b2a55186 · b2a55186
Unverified Commit b2a55186 authored Apr 21, 2026 by xiangdong Committed by GitHub Apr 21, 2026
5 changed files
--- a/.buildkite/intel_jobs/engine_intel.yaml
+++ b/.buildkite/intel_jobs/engine_intel.yaml
+group: Engine Intel
+depends_on:
+  - image-build-xpu
+steps:
+- label: Engine (1 GPU)
+  timeout_in_minutes: 30
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/v1/engine/
+    - tests/v1/engine/
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      pytest -v -s v1/engine --ignore v1/engine/test_preprocess_error_handling.py'
--- a/.buildkite/intel_jobs/kernels_intel.yaml
+++ b/.buildkite/intel_jobs/kernels_intel.yaml
+group: Kernels Intel
+depends_on: 
+  - image-build-xpu
+steps:
+- label: vLLM IR Tests
+  timeout_in_minutes: 30
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/ir
+    - vllm/kernels
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      pytest -v -s kernels/ir'
--- a/.buildkite/intel_jobs/lora_intel.yaml
+++ b/.buildkite/intel_jobs/lora_intel.yaml
+group: LoRA Intel
+depends_on:
+  - image-build-xpu
+steps:
+- label: LoRA Runtime + Utils
+  timeout_in_minutes: 45
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/lora
+    - tests/lora
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      pytest -v -s lora/test_layers.py &&
+      pytest -v -s lora/test_lora_checkpoints.py &&
+      pytest -v -s lora/test_lora_functions.py &&
+      pytest -v -s lora/test_lora_huggingface.py &&
+      pytest -v -s lora/test_lora_manager.py &&
+      pytest -v -s lora/test_lora_utils.py &&
+      pytest -v -s lora/test_peft_helper.py &&
+      pytest -v -s lora/test_resolver.py &&
+      pytest -v -s lora/test_utils.py &&
+      (pytest -v -s lora/test_add_lora.py --deselect="tests/lora/test_add_lora.py::test_add_lora" || true) &&
+      (pytest -v -s lora/test_worker.py --deselect="tests/lora/test_worker.py::test_worker_apply_lora" || true)'
+- label: LoRA Fused/MoE Kernels
+  timeout_in_minutes: 45
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/lora
+    - tests/lora
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      pytest -v -s lora/test_fused_moe_lora_kernel.py && 
+      pytest -v -s lora/test_moe_lora_align_sum.py'
+- label: LoRA Punica Kernels
+  timeout_in_minutes: 45
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/lora
+    - tests/lora
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      set -o pipefail &&
+      pytest -v -s lora/test_punica_ops.py --deselect="tests/lora/test_punica_ops.py::test_kernels[shrink-0-xpu:0-dtype0-2-2049-64-32-32]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[expand-0-xpu:0-dtype1-2-64000-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels[shrink-0-xpu:0-dtype0-1-2049-128-1-32]" --deselect="tests/lora/test_punica_ops.py::test_kernels[shrink-0-xpu:0-dtype0-1-2049-256-1-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels[shrink-0-xpu:0-dtype0-1-2049-256-8-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels[expand-0-xpu:0-dtype0-3-2049-128-8-16]" --deselect="tests/lora/test_punica_ops.py::test_kernels[shrink-0-xpu:0-dtype0-1-2049-128-8-32]" --deselect="tests/lora/test_punica_ops.py::test_kernels[expand-0-xpu:0-dtype1-1-2049-256-128-32]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[shrink-0-xpu:0-dtype0-3-64256-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[shrink-0-xpu:0-dtype1-2-29696-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[shrink-0-xpu:0-dtype1-3-49408-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[shrink-0-xpu:0-dtype0-2-16384-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[expand-0-xpu:0-dtype0-2-51328-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[expand-0-xpu:0-dtype1-1-102656-32-4-4]"'
+- label: LoRA Punica FP8/XPU Ops
+  timeout_in_minutes: 45
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/lora
+    - tests/lora
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      pytest -v -s lora/test_punica_ops_fp8.py &&
+      pytest -v -s lora/test_punica_xpu_ops.py'
+- label: LoRA Models
+  timeout_in_minutes: 45
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/lora
+    - tests/lora
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      (pytest -v -s lora/test_mixtral.py --deselect="tests/lora/test_mixtral.py::test_mixtral_lora[4]" || true) &&
+      pytest -v -s lora/test_quant_model.py --deselect="tests/lora/test_quant_model.py::test_quant_model_lora[model0]" --deselect="tests/lora/test_quant_model.py::test_quant_model_lora[model1]" --deselect="tests/lora/test_quant_model.py::test_quant_model_tp_equality[model0]" &&
+      pytest -v -s lora/test_qwen35_densemodel_lora.py &&
+      pytest -v -s lora/test_transformers_model.py'
+- label: LoRA Multimodal
+  timeout_in_minutes: 45
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/lora
+    - tests/lora
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      pytest -v -s lora/test_default_mm_loras.py && 
+      pytest -v -s lora/test_qwen3_unembed.py &&
+      pytest -v -s lora/test_qwenvl.py &&
+      pytest -v -s lora/test_whisper.py'
--- a/.buildkite/intel_jobs/misc_intel.yaml
+++ b/.buildkite/intel_jobs/misc_intel.yaml
+group: Miscellaneous Intel
+depends_on:
+  - image-build-xpu
+steps:
+- label: V1 Core + KV + Metrics
+  timeout_in_minutes: 30
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/
+    - tests/v1/core
+    - tests/v1/executor
+    - tests/v1/kv_offload
+    - tests/v1/worker
+    - tests/v1/kv_connector/unit
+    - tests/v1/metrics
+    - tests/entrypoints/openai/correctness/test_lmeval.py
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'pip install -r requirements/kv_connectors.txt &&
+      export VLLM_WORKER_MULTIPROC_METHOD=spawn &&
+      cd tests &&
+      pytest -v -s v1/executor'
+- label: V1 Sample + Logits
+  timeout_in_minutes: 30
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/
+    - tests/v1/sample
+    - tests/v1/logits_processors
+    - tests/v1/test_oracle.py
+    - tests/v1/test_request.py
+    - tests/v1/test_outputs.py
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'export VLLM_WORKER_MULTIPROC_METHOD=spawn &&
+      cd tests &&
+      pytest -v -s v1/logits_processors &&
+      pytest -v -s v1/test_oracle.py &&
+      pytest -v -s v1/test_request.py &&
+      pytest -v -s v1/test_outputs.py'
--- a/.buildkite/scripts/hardware_ci/run-intel-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-intel-test.sh
@@ -240,7 +240,6 @@ fi
 cleanup_docker
 aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
-aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 936637512419.dkr.ecr.us-east-1.amazonaws.com
 # --- Build or pull test image ---
 IMAGE="${IMAGE_TAG_XPU:-${image_name}}"
@@ -283,6 +282,7 @@ docker run \
    --ipc=host \
    --privileged \
    -v /dev/dri/by-path:/dev/dri/by-path \
+    -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
    --entrypoint="" \
    -e "HF_TOKEN=${HF_TOKEN:-}" \
    -e "ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK:-}" \