Unverified Commit 0291fbf6 authored by Zhewen Li's avatar Zhewen Li Committed by GitHub
Browse files

[CI/Build] Fix amd model executor test (#27612)


Signed-off-by: default avatarzhewenli <zhewenli@meta.com>
parent b46e4a06
...@@ -561,7 +561,7 @@ steps: ...@@ -561,7 +561,7 @@ steps:
- label: Model Executor Test # 23min - label: Model Executor Test # 23min
timeout_in_minutes: 35 timeout_in_minutes: 35
mirror_hardwares: [amdexperimental] mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1 agent_pool: mi325_1
# grade: Blocking # grade: Blocking
source_file_dependencies: source_file_dependencies:
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from vllm import SamplingParams from vllm import SamplingParams
from vllm.platforms import current_platform
test_model = "openai-community/gpt2" test_model = "openai-community/gpt2"
...@@ -15,6 +18,9 @@ prompts = [ ...@@ -15,6 +18,9 @@ prompts = [
sampling_params = SamplingParams(temperature=0.8, top_p=0.95, seed=0) sampling_params = SamplingParams(temperature=0.8, top_p=0.95, seed=0)
@pytest.mark.skipif(
not current_platform.is_cuda(), reason="fastsafetensors requires CUDA/NVIDIA GPUs"
)
def test_model_loader_download_files(vllm_runner): def test_model_loader_download_files(vllm_runner):
with vllm_runner(test_model, load_format="fastsafetensors") as llm: with vllm_runner(test_model, load_format="fastsafetensors") as llm:
deserialized_outputs = llm.generate(prompts, sampling_params) deserialized_outputs = llm.generate(prompts, sampling_params)
......
...@@ -5,6 +5,7 @@ import glob ...@@ -5,6 +5,7 @@ import glob
import tempfile import tempfile
import huggingface_hub.constants import huggingface_hub.constants
import pytest
import torch import torch
from vllm.model_executor.model_loader.weight_utils import ( from vllm.model_executor.model_loader.weight_utils import (
...@@ -12,8 +13,12 @@ from vllm.model_executor.model_loader.weight_utils import ( ...@@ -12,8 +13,12 @@ from vllm.model_executor.model_loader.weight_utils import (
fastsafetensors_weights_iterator, fastsafetensors_weights_iterator,
safetensors_weights_iterator, safetensors_weights_iterator,
) )
from vllm.platforms import current_platform
@pytest.mark.skipif(
not current_platform.is_cuda(), reason="fastsafetensors requires CUDA/NVIDIA GPUs"
)
def test_fastsafetensors_model_loader(): def test_fastsafetensors_model_loader():
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
huggingface_hub.constants.HF_HUB_OFFLINE = False huggingface_hub.constants.HF_HUB_OFFLINE = False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment