[CI] Move applicable tests to CPU (#24080)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

[CI] Move applicable tests to CPU (#24080)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
bc546f76 · Reza Barazesh · GitHub · 80608ba5 · bc546f76 · bc546f76
Unverified Commit bc546f76 authored Sep 30, 2025 by Reza Barazesh Committed by GitHub Sep 30, 2025
19 changed files
--- a/tests/tool_use/test_jamba_tool_parser.py
+++ b/tests/tool_use/test_jamba_tool_parser.py
@@ -15,6 +15,8 @@ from vllm.entrypoints.openai.tool_parsers import JambaToolParser
 from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
 from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
+pytestmark = pytest.mark.cpu_test
 MODEL = "ai21labs/Jamba-tiny-dev"

--- a/tests/tool_use/test_kimi_k2_tool_parser.py
+++ b/tests/tool_use/test_kimi_k2_tool_parser.py
@@ -10,6 +10,8 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
 from vllm.entrypoints.openai.tool_parsers import KimiK2ToolParser
 from vllm.transformers_utils.tokenizer import get_tokenizer
+pytestmark = pytest.mark.cpu_test
 # Use a common model that is likely to be available
 MODEL = "moonshotai/Kimi-K2-Instruct"

--- a/tests/tool_use/test_minimax_tool_parser.py
+++ b/tests/tool_use/test_minimax_tool_parser.py
@@ -12,6 +12,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionToolsParam,
 from vllm.entrypoints.openai.tool_parsers import MinimaxToolParser
 from vllm.transformers_utils.tokenizer import get_tokenizer
+pytestmark = pytest.mark.cpu_test
 # Use a common model that is likely to be available
 MODEL = "MiniMaxAi/MiniMax-M1-40k"

--- a/tests/tool_use/test_qwen3coder_tool_parser.py
+++ b/tests/tool_use/test_qwen3coder_tool_parser.py
@@ -18,6 +18,8 @@ from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import (
 from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
 from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
+pytestmark = pytest.mark.cpu_test
 MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8"

--- a/tests/tool_use/test_seed_oss_tool_parser.py
+++ b/tests/tool_use/test_seed_oss_tool_parser.py
@@ -16,6 +16,8 @@ from vllm.entrypoints.openai.tool_parsers import SeedOssToolParser
 from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
 from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
+pytestmark = pytest.mark.cpu_test
 # Use a common model that is likely to be available
 MODEL = "ByteDance-Seed/Seed-OSS-36B-Instruct"

--- a/tests/tool_use/test_tool_choice_required.py
+++ b/tests/tool_use/test_tool_choice_required.py
@@ -12,6 +12,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                              ChatCompletionToolsParam)
 from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
+pytestmark = pytest.mark.cpu_test
 EXAMPLE_TOOLS = [
    {
        "type": "function",

--- a/tests/tool_use/test_xlam_tool_parser.py
+++ b/tests/tool_use/test_xlam_tool_parser.py
@@ -14,6 +14,8 @@ from vllm.entrypoints.openai.tool_parsers import xLAMToolParser
 from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
 from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
+pytestmark = pytest.mark.cpu_test
 # Use a common model that is likely to be available
 MODEL = "Salesforce/Llama-xLAM-2-8B-fc-r"

--- a/tests/v1/core/test_async_scheduler.py
+++ b/tests/v1/core/test_async_scheduler.py
@@ -11,6 +11,8 @@ from vllm.v1.utils import ConstantList
 from .utils import create_requests, create_scheduler
+pytestmark = pytest.mark.cpu_test
 def _make_model_runner_output(
    scheduler_output: SchedulerOutput, ) -> ModelRunnerOutput:

--- a/tests/v1/core/test_encoder_cache_manager.py
+++ b/tests/v1/core/test_encoder_cache_manager.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
 from vllm.multimodal.inputs import MultiModalFeatureSpec, PlaceholderRange
 from vllm.v1.core.encoder_cache_manager import EncoderCacheManager
+pytestmark = pytest.mark.cpu_test
 # ------------------ Mock Classes ------------------ #
 class MockRequest:

--- a/tests/v1/core/test_kv_cache_utils.py
+++ b/tests/v1/core/test_kv_cache_utils.py
@@ -32,6 +32,8 @@ from vllm.v1.request import Request
 # yapf: enable
+pytestmark = pytest.mark.cpu_test
 @pytest.fixture(autouse=True)
 def _auto_init_hash_fn(request):

--- a/tests/v1/core/test_prefix_caching.py
+++ b/tests/v1/core/test_prefix_caching.py
@@ -25,6 +25,8 @@ from vllm.v1.core.kv_cache_utils import (BlockHash, BlockHashWithGroupId,
 from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
                                        KVCacheGroupSpec, SlidingWindowSpec)
+pytestmark = pytest.mark.cpu_test
 @pytest.fixture(autouse=True)
 def _auto_init_hash_fn(request):
@@ -1267,7 +1269,7 @@ def test_kv_cache_events(blocks_to_cache: int):
 def test_eagle_enabled_removes_last_block():
-    """Verify Eagle does NOT remove blocks when request 
+    """Verify Eagle does NOT remove blocks when request
    length is divisible by block size."""
    block_size = 16
    manager = KVCacheManager(

--- a/tests/v1/core/test_scheduler.py
+++ b/tests/v1/core/test_scheduler.py
@@ -23,6 +23,8 @@ from vllm.v1.structured_output.request import StructuredOutputRequest
 from .utils import EOS_TOKEN_ID, create_requests, create_scheduler
+pytestmark = pytest.mark.cpu_test
 def test_add_requests():
    scheduler = create_scheduler()

--- a/tests/v1/core/test_single_type_kv_cache_manager.py
+++ b/tests/v1/core/test_single_type_kv_cache_manager.py
@@ -3,6 +3,7 @@
 import random
+import pytest
 import torch
 from vllm.v1.core.block_pool import BlockPool
@@ -13,6 +14,8 @@ from vllm.v1.core.single_type_kv_cache_manager import (
 from vllm.v1.kv_cache_interface import (ChunkedLocalAttentionSpec,
                                        SlidingWindowSpec)
+pytestmark = pytest.mark.cpu_test
 def get_sliding_window_manager(sliding_window_spec, block_pool):
    return SlidingWindowManager(sliding_window_spec,

--- a/tests/v1/kv_connector/unit/test_output_aggreagator.py
+++ b/tests/v1/kv_connector/unit/test_output_aggreagator.py
@@ -3,9 +3,13 @@
 from concurrent.futures import Future
 from typing import Optional
+import pytest
 from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator
 from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput
+pytestmark = pytest.mark.cpu_test
 class DummyModelRunnerOutput(ModelRunnerOutput):

--- a/tests/v1/kv_connector/unit/test_remote_decode_lifecycle.py
+++ b/tests/v1/kv_connector/unit/test_remote_decode_lifecycle.py
@@ -2,12 +2,16 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import copy
+import pytest
 from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput
 from vllm.v1.request import FinishReason, RequestStatus
 from .utils import (assert_scheduler_empty, create_model_runner_output,
                    create_request, create_scheduler, create_vllm_config)
+pytestmark = pytest.mark.cpu_test
 def test_basic_lifecycle():
    """Test lifecycle of a Remote Decode request."""

--- a/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py
+++ b/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py
@@ -2,12 +2,16 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import copy
+import pytest
 from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput
 from vllm.v1.request import FinishReason, RequestStatus
 from .utils import (assert_scheduler_empty, create_model_runner_output,
                    create_request, create_scheduler, create_vllm_config)
+pytestmark = pytest.mark.cpu_test
 def test_basic_lifecycle():
    """Test lifecycle of a remote prefill."""

--- a/tests/v1/metrics/test_metrics_reader.py
+++ b/tests/v1/metrics/test_metrics_reader.py
@@ -7,6 +7,8 @@ import pytest
 from vllm.v1.metrics.reader import (Counter, Gauge, Histogram, Vector,
                                    get_metrics_snapshot)
+pytestmark = pytest.mark.cpu_test
 @pytest.fixture(autouse=True)
 def test_registry(monkeypatch):

--- a/tests/v1/structured_output/test_utils.py
+++ b/tests/v1/structured_output/test_utils.py
@@ -6,6 +6,8 @@ import pytest
 from vllm.v1.structured_output.backend_xgrammar import (
    has_xgrammar_unsupported_json_features)
+pytestmark = pytest.mark.cpu_test
 @pytest.fixture
 def unsupported_string_schemas():

--- a/tests/v1/test_serial_utils.py
+++ b/tests/v1/test_serial_utils.py
@@ -16,6 +16,8 @@ from vllm.multimodal.inputs import (MultiModalBatchedField,
                                    MultiModalSharedField, NestedTensors)
 from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder
+pytestmark = pytest.mark.cpu_test
 class UnrecognizedType(UserDict):