"vscode:/vscode.git/clone" did not exist on "8a4358ecb5ba457fad2be0ed930132489eddddf5"
Unverified Commit bc546f76 authored by Reza Barazesh's avatar Reza Barazesh Committed by GitHub
Browse files

[CI] Move applicable tests to CPU (#24080)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 80608ba5
...@@ -15,6 +15,8 @@ from vllm.entrypoints.openai.tool_parsers import JambaToolParser ...@@ -15,6 +15,8 @@ from vllm.entrypoints.openai.tool_parsers import JambaToolParser
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
pytestmark = pytest.mark.cpu_test
MODEL = "ai21labs/Jamba-tiny-dev" MODEL = "ai21labs/Jamba-tiny-dev"
......
...@@ -10,6 +10,8 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall ...@@ -10,6 +10,8 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers import KimiK2ToolParser from vllm.entrypoints.openai.tool_parsers import KimiK2ToolParser
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.transformers_utils.tokenizer import get_tokenizer
pytestmark = pytest.mark.cpu_test
# Use a common model that is likely to be available # Use a common model that is likely to be available
MODEL = "moonshotai/Kimi-K2-Instruct" MODEL = "moonshotai/Kimi-K2-Instruct"
......
...@@ -12,6 +12,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionToolsParam, ...@@ -12,6 +12,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionToolsParam,
from vllm.entrypoints.openai.tool_parsers import MinimaxToolParser from vllm.entrypoints.openai.tool_parsers import MinimaxToolParser
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.transformers_utils.tokenizer import get_tokenizer
pytestmark = pytest.mark.cpu_test
# Use a common model that is likely to be available # Use a common model that is likely to be available
MODEL = "MiniMaxAi/MiniMax-M1-40k" MODEL = "MiniMaxAi/MiniMax-M1-40k"
......
...@@ -18,6 +18,8 @@ from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import ( ...@@ -18,6 +18,8 @@ from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import (
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
pytestmark = pytest.mark.cpu_test
MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8" MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8"
......
...@@ -16,6 +16,8 @@ from vllm.entrypoints.openai.tool_parsers import SeedOssToolParser ...@@ -16,6 +16,8 @@ from vllm.entrypoints.openai.tool_parsers import SeedOssToolParser
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
pytestmark = pytest.mark.cpu_test
# Use a common model that is likely to be available # Use a common model that is likely to be available
MODEL = "ByteDance-Seed/Seed-OSS-36B-Instruct" MODEL = "ByteDance-Seed/Seed-OSS-36B-Instruct"
......
...@@ -12,6 +12,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, ...@@ -12,6 +12,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
ChatCompletionToolsParam) ChatCompletionToolsParam)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
pytestmark = pytest.mark.cpu_test
EXAMPLE_TOOLS = [ EXAMPLE_TOOLS = [
{ {
"type": "function", "type": "function",
......
...@@ -14,6 +14,8 @@ from vllm.entrypoints.openai.tool_parsers import xLAMToolParser ...@@ -14,6 +14,8 @@ from vllm.entrypoints.openai.tool_parsers import xLAMToolParser
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
pytestmark = pytest.mark.cpu_test
# Use a common model that is likely to be available # Use a common model that is likely to be available
MODEL = "Salesforce/Llama-xLAM-2-8B-fc-r" MODEL = "Salesforce/Llama-xLAM-2-8B-fc-r"
......
...@@ -11,6 +11,8 @@ from vllm.v1.utils import ConstantList ...@@ -11,6 +11,8 @@ from vllm.v1.utils import ConstantList
from .utils import create_requests, create_scheduler from .utils import create_requests, create_scheduler
pytestmark = pytest.mark.cpu_test
def _make_model_runner_output( def _make_model_runner_output(
scheduler_output: SchedulerOutput, ) -> ModelRunnerOutput: scheduler_output: SchedulerOutput, ) -> ModelRunnerOutput:
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from vllm.multimodal.inputs import MultiModalFeatureSpec, PlaceholderRange from vllm.multimodal.inputs import MultiModalFeatureSpec, PlaceholderRange
from vllm.v1.core.encoder_cache_manager import EncoderCacheManager from vllm.v1.core.encoder_cache_manager import EncoderCacheManager
pytestmark = pytest.mark.cpu_test
# ------------------ Mock Classes ------------------ # # ------------------ Mock Classes ------------------ #
class MockRequest: class MockRequest:
......
...@@ -32,6 +32,8 @@ from vllm.v1.request import Request ...@@ -32,6 +32,8 @@ from vllm.v1.request import Request
# yapf: enable # yapf: enable
pytestmark = pytest.mark.cpu_test
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def _auto_init_hash_fn(request): def _auto_init_hash_fn(request):
......
...@@ -25,6 +25,8 @@ from vllm.v1.core.kv_cache_utils import (BlockHash, BlockHashWithGroupId, ...@@ -25,6 +25,8 @@ from vllm.v1.core.kv_cache_utils import (BlockHash, BlockHashWithGroupId,
from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig, from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
KVCacheGroupSpec, SlidingWindowSpec) KVCacheGroupSpec, SlidingWindowSpec)
pytestmark = pytest.mark.cpu_test
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def _auto_init_hash_fn(request): def _auto_init_hash_fn(request):
...@@ -1267,7 +1269,7 @@ def test_kv_cache_events(blocks_to_cache: int): ...@@ -1267,7 +1269,7 @@ def test_kv_cache_events(blocks_to_cache: int):
def test_eagle_enabled_removes_last_block(): def test_eagle_enabled_removes_last_block():
"""Verify Eagle does NOT remove blocks when request """Verify Eagle does NOT remove blocks when request
length is divisible by block size.""" length is divisible by block size."""
block_size = 16 block_size = 16
manager = KVCacheManager( manager = KVCacheManager(
......
...@@ -23,6 +23,8 @@ from vllm.v1.structured_output.request import StructuredOutputRequest ...@@ -23,6 +23,8 @@ from vllm.v1.structured_output.request import StructuredOutputRequest
from .utils import EOS_TOKEN_ID, create_requests, create_scheduler from .utils import EOS_TOKEN_ID, create_requests, create_scheduler
pytestmark = pytest.mark.cpu_test
def test_add_requests(): def test_add_requests():
scheduler = create_scheduler() scheduler = create_scheduler()
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import random import random
import pytest
import torch import torch
from vllm.v1.core.block_pool import BlockPool from vllm.v1.core.block_pool import BlockPool
...@@ -13,6 +14,8 @@ from vllm.v1.core.single_type_kv_cache_manager import ( ...@@ -13,6 +14,8 @@ from vllm.v1.core.single_type_kv_cache_manager import (
from vllm.v1.kv_cache_interface import (ChunkedLocalAttentionSpec, from vllm.v1.kv_cache_interface import (ChunkedLocalAttentionSpec,
SlidingWindowSpec) SlidingWindowSpec)
pytestmark = pytest.mark.cpu_test
def get_sliding_window_manager(sliding_window_spec, block_pool): def get_sliding_window_manager(sliding_window_spec, block_pool):
return SlidingWindowManager(sliding_window_spec, return SlidingWindowManager(sliding_window_spec,
......
...@@ -3,9 +3,13 @@ ...@@ -3,9 +3,13 @@
from concurrent.futures import Future from concurrent.futures import Future
from typing import Optional from typing import Optional
import pytest
from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator
from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput
pytestmark = pytest.mark.cpu_test
class DummyModelRunnerOutput(ModelRunnerOutput): class DummyModelRunnerOutput(ModelRunnerOutput):
......
...@@ -2,12 +2,16 @@ ...@@ -2,12 +2,16 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import copy import copy
import pytest
from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput
from vllm.v1.request import FinishReason, RequestStatus from vllm.v1.request import FinishReason, RequestStatus
from .utils import (assert_scheduler_empty, create_model_runner_output, from .utils import (assert_scheduler_empty, create_model_runner_output,
create_request, create_scheduler, create_vllm_config) create_request, create_scheduler, create_vllm_config)
pytestmark = pytest.mark.cpu_test
def test_basic_lifecycle(): def test_basic_lifecycle():
"""Test lifecycle of a Remote Decode request.""" """Test lifecycle of a Remote Decode request."""
......
...@@ -2,12 +2,16 @@ ...@@ -2,12 +2,16 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import copy import copy
import pytest
from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput
from vllm.v1.request import FinishReason, RequestStatus from vllm.v1.request import FinishReason, RequestStatus
from .utils import (assert_scheduler_empty, create_model_runner_output, from .utils import (assert_scheduler_empty, create_model_runner_output,
create_request, create_scheduler, create_vllm_config) create_request, create_scheduler, create_vllm_config)
pytestmark = pytest.mark.cpu_test
def test_basic_lifecycle(): def test_basic_lifecycle():
"""Test lifecycle of a remote prefill.""" """Test lifecycle of a remote prefill."""
......
...@@ -7,6 +7,8 @@ import pytest ...@@ -7,6 +7,8 @@ import pytest
from vllm.v1.metrics.reader import (Counter, Gauge, Histogram, Vector, from vllm.v1.metrics.reader import (Counter, Gauge, Histogram, Vector,
get_metrics_snapshot) get_metrics_snapshot)
pytestmark = pytest.mark.cpu_test
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def test_registry(monkeypatch): def test_registry(monkeypatch):
......
...@@ -6,6 +6,8 @@ import pytest ...@@ -6,6 +6,8 @@ import pytest
from vllm.v1.structured_output.backend_xgrammar import ( from vllm.v1.structured_output.backend_xgrammar import (
has_xgrammar_unsupported_json_features) has_xgrammar_unsupported_json_features)
pytestmark = pytest.mark.cpu_test
@pytest.fixture @pytest.fixture
def unsupported_string_schemas(): def unsupported_string_schemas():
......
...@@ -16,6 +16,8 @@ from vllm.multimodal.inputs import (MultiModalBatchedField, ...@@ -16,6 +16,8 @@ from vllm.multimodal.inputs import (MultiModalBatchedField,
MultiModalSharedField, NestedTensors) MultiModalSharedField, NestedTensors)
from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder
pytestmark = pytest.mark.cpu_test
class UnrecognizedType(UserDict): class UnrecognizedType(UserDict):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment