[CI/Build] Fix amd model executor test (#27612)

Signed-off-by: zhewenli <zhewenli@meta.com>

[CI/Build] Fix amd model executor test (#27612)
Signed-off-by: zhewenli <zhewenli@meta.com>
0291fbf6 · Zhewen Li · GitHub · b46e4a06 · 0291fbf6 · 0291fbf6
Unverified Commit 0291fbf6 authored Oct 28, 2025 by Zhewen Li Committed by GitHub Oct 28, 2025
3 changed files
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -561,7 +561,7 @@ steps:

 - label: Model Executor Test # 23min
  timeout_in_minutes: 35
-  mirror_hardwares: [amdexperimental]
+  mirror_hardwares: [amdexperimental, amdproduction]
  agent_pool: mi325_1
  # grade: Blocking
  source_file_dependencies:

--- a/tests/model_executor/model_loader/fastsafetensors_loader/test_fastsafetensors_loader.py
+++ b/tests/model_executor/model_loader/fastsafetensors_loader/test_fastsafetensors_loader.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

+import pytest
+
 from vllm import SamplingParams
+from vllm.platforms import current_platform

 test_model = "openai-community/gpt2"

@@ -15,6 +18,9 @@ prompts = [
 sampling_params = SamplingParams(temperature=0.8, top_p=0.95, seed=0)


+@pytest.mark.skipif(
+    not current_platform.is_cuda(), reason="fastsafetensors requires CUDA/NVIDIA GPUs"
+)
 def test_model_loader_download_files(vllm_runner):
    with vllm_runner(test_model, load_format="fastsafetensors") as llm:
        deserialized_outputs = llm.generate(prompts, sampling_params)

--- a/tests/model_executor/model_loader/fastsafetensors_loader/test_weight_utils.py
+++ b/tests/model_executor/model_loader/fastsafetensors_loader/test_weight_utils.py
@@ -5,6 +5,7 @@ import glob
 import tempfile

 import huggingface_hub.constants
+import pytest
 import torch

 from vllm.model_executor.model_loader.weight_utils import (
@@ -12,8 +13,12 @@ from vllm.model_executor.model_loader.weight_utils import (
    fastsafetensors_weights_iterator,
    safetensors_weights_iterator,
 )
+from vllm.platforms import current_platform


+@pytest.mark.skipif(
+    not current_platform.is_cuda(), reason="fastsafetensors requires CUDA/NVIDIA GPUs"
+)
 def test_fastsafetensors_model_loader():
    with tempfile.TemporaryDirectory() as tmpdir:
        huggingface_hub.constants.HF_HUB_OFFLINE = False