[Refactor] Relocate chat completion and anthropic tests (#36919)

Signed-off-by: sfeng33 <4florafeng@gmail.com>

[Refactor] Relocate chat completion and anthropic tests (#36919)
Signed-off-by: sfeng33 <4florafeng@gmail.com>
bcfdadb1 · Flora Feng · GitHub · 236de72e · bcfdadb1 · bcfdadb1
Unverified Commit bcfdadb1 authored Mar 14, 2026 by Flora Feng Committed by GitHub Mar 14, 2026
14 changed files
--- a/.buildkite/test_areas/entrypoints.yaml
+++ b/.buildkite/test_areas/entrypoints.yaml
@@ -34,7 +34,7 @@ steps:
  - tests/entrypoints/test_chat_utils
  commands:
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/  --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
+  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/  --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
  - pytest -v -s entrypoints/test_chat_utils.py
  mirror:
    amd:

--- a/.github/mergify.yml
+++ b/.github/mergify.yml
@@ -334,7 +334,7 @@ pull_request_rules:
    - or:
      - files~=^tests/tool_use/
      - files~=^tests/entrypoints/openai/tool_parsers/
-      - files=tests/entrypoints/openai/test_chat_with_tool_reasoning.py
+      - files=tests/entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py
      - files~=^vllm/entrypoints/openai/tool_parsers/
      - files=docs/features/tool_calling.md
      - files~=^examples/tool_chat_*

--- a/tests/entrypoints/anthropic/__init__.py
+++ b/tests/entrypoints/anthropic/__init__.py
--- a/tests/entrypoints/openai/test_anthropic_messages_conversion.py
+++ b/tests/entrypoints/openai/test_anthropic_messages_conversion.py
--- a/tests/entrypoints/openai/chat_completion/__init__.py
+++ b/tests/entrypoints/openai/chat_completion/__init__.py
--- a/tests/entrypoints/openai/test_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@@ -14,13 +14,12 @@ import requests
 import torch
 from openai import BadRequestError
+from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.openai.chat_completion.protocol import (
    ChatCompletionRequest,
 )
 from vllm.sampling_params import SamplingParams
-from ...utils import RemoteOpenAIServer
 # any model with a chat template should work here
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"

--- a/tests/entrypoints/openai/test_chat_echo.py
+++ b/tests/entrypoints/openai/test_chat_echo.py
@@ -7,10 +7,9 @@ import openai  # use the official client for correctness check
 import pytest
 import pytest_asyncio
+from tests.utils import RemoteOpenAIServer
 from vllm.config import ModelConfig
-from ...utils import RemoteOpenAIServer
 # # any model with a chat template should work here
 MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"

--- a/tests/entrypoints/openai/test_chat_error.py
+++ b/tests/entrypoints/openai/test_chat_error.py
--- a/tests/entrypoints/openai/test_chat_logit_bias_validation.py
+++ b/tests/entrypoints/openai/test_chat_logit_bias_validation.py
@@ -5,10 +5,9 @@ import openai
 import pytest
 import pytest_asyncio
+from tests.utils import RemoteOpenAIServer
 from vllm.config import ModelConfig
-from ...utils import RemoteOpenAIServer
 MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"

--- a/tests/entrypoints/openai/test_chat_with_tool_reasoning.py
+++ b/tests/entrypoints/openai/test_chat_with_tool_reasoning.py
@@ -5,7 +5,7 @@ import openai  # use the official client for correctness check
 import pytest
 import pytest_asyncio
-from ...utils import RemoteOpenAIServer
+from tests.utils import RemoteOpenAIServer
 # a reasoning and tool calling model
 MODEL_NAME = "Qwen/QwQ-32B"

--- a/tests/entrypoints/openai/test_completion_with_function_calling.py
+++ b/tests/entrypoints/openai/test_completion_with_function_calling.py
@@ -10,7 +10,7 @@ import pytest
 import pytest_asyncio
 # downloading lora to test lora requests
-from ...utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer
+from tests.utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer
 # any model with a chat template should work here
 MODEL_NAME = "Qwen/Qwen3-0.6B"

--- a/tests/entrypoints/openai/test_enable_force_include_usage.py
+++ b/tests/entrypoints/openai/test_enable_force_include_usage.py
@@ -4,7 +4,7 @@ import openai
 import pytest
 import pytest_asyncio
-from ...utils import RemoteOpenAIServer
+from tests.utils import RemoteOpenAIServer
 @pytest.fixture(scope="module")

--- a/tests/entrypoints/openai/test_serving_chat.py
+++ b/tests/entrypoints/openai/test_serving_chat.py
@@ -10,6 +10,12 @@ import pytest
 import pytest_asyncio
 from openai import OpenAI
+from tests.entrypoints.openai.utils import (
+    accumulate_streaming_response,
+    verify_chat_response,
+    verify_harmony_messages,
+)
+from tests.utils import RemoteOpenAIServer
 from vllm._aiter_ops import is_aiter_found_and_supported
 from vllm.config import MultiModalConfig
 from vllm.entrypoints.openai.chat_completion.protocol import (
@@ -39,13 +45,6 @@ from vllm.tokenizers.registry import tokenizer_args_from_config
 from vllm.tool_parsers import ToolParserManager
 from vllm.v1.engine.async_llm import AsyncLLM
-from ...utils import RemoteOpenAIServer
-from .utils import (
-    accumulate_streaming_response,
-    verify_chat_response,
-    verify_harmony_messages,
-)
 GPT_OSS_MODEL_NAME = "openai/gpt-oss-20b"
 GPT_OSS_SPECULATOR_NAME = "RedHatAI/gpt-oss-20b-speculator.eagle3"

--- a/tests/entrypoints/openai/test_serving_chat_stream_harmony.py
+++ b/tests/entrypoints/openai/test_serving_chat_stream_harmony.py