[CI/Build] Separate out flaky responses API tests (#32110)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

[CI/Build] Separate out flaky responses API tests (#32110)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
a3745321 · Cyrus Leung · GitHub · cee7436a · a3745321 · a3745321
Unverified Commit a3745321 authored Jan 11, 2026 by Cyrus Leung Committed by GitHub Jan 11, 2026
10 changed files
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -162,8 +162,7 @@ steps:
  - tests/entrypoints/test_chat_utils
  commands:
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/test_vision_embeds.py
-  - pytest -v -s entrypoints/openai/test_vision_embeds.py
+  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
  - pytest -v -s entrypoints/test_chat_utils.py

 - label: Entrypoints Integration Test (API Server 2)
@@ -200,6 +199,21 @@ steps:
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
  - pytest -v -s entrypoints/pooling

+- label: Entrypoints Integration Test (Responses API)
+  timeout_in_minutes: 50
+  mirror_hardwares: [amdexperimental]
+  agent_pool: mi325_1
+  # grade: Blocking
+  working_dir: "/vllm-workspace/tests"
+  fast_check: true
+  torch_nightly: true
+  source_file_dependencies:
+  - vllm/
+  - tests/entrypoints/openai/responses
+  commands:
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s entrypoints/openai/responses
+
 - label: Distributed Tests (4 GPUs) # 35min
  timeout_in_minutes: 50
  mirror_hardwares: [amdexperimental]

--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -144,7 +144,7 @@ steps:
  - tests/entrypoints/test_chat_utils
  commands:
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/  --ignore=entrypoints/openai/tool_parsers/
+  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/  --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
  - pytest -v -s entrypoints/test_chat_utils.py

 - label: Entrypoints Integration Test (API Server 2)
@@ -177,6 +177,18 @@ steps:
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
  - pytest -v -s entrypoints/pooling

+- label: Entrypoints Integration Test (Responses API)
+  timeout_in_minutes: 50
+  mirror_hardwares: [amdexperimental]
+  working_dir: "/vllm-workspace/tests"
+  fast_check: true
+  torch_nightly: true
+  source_file_dependencies:
+  - vllm/
+  - tests/entrypoints/openai/responses
+  commands:
+  - pytest -v -s entrypoints/openai/responses
+
 - label: Distributed Tests (4 GPUs) # 35min
  timeout_in_minutes: 50
  mirror_hardwares: [amdexperimental]

--- a/.buildkite/test_areas/entrypoints.yaml
+++ b/.buildkite/test_areas/entrypoints.yaml
@@ -34,10 +34,9 @@ steps:
  - tests/entrypoints/test_chat_utils
  commands:
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/  --ignore=entrypoints/openai/tool_parsers/
+  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/  --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
  - pytest -v -s entrypoints/test_chat_utils.py

-
 - label: Entrypoints Integration (API Server 2)
  timeout_in_minutes: 130
  working_dir: "/vllm-workspace/tests"
@@ -64,6 +63,14 @@ steps:
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
  - pytest -v -s entrypoints/pooling

+- label: Entrypoints Integration (Responses API)
+  timeout_in_minutes: 50
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - vllm/
+  - tests/entrypoints/openai/responses
+  commands:
+  - pytest -v -s entrypoints/openai/responses

 - label: Entrypoints V1
  timeout_in_minutes: 50

--- a/tests/entrypoints/openai/responses/__init__.py
+++ b/tests/entrypoints/openai/responses/__init__.py
--- a/tests/entrypoints/openai/test_responses_error.py
+++ b/tests/entrypoints/openai/test_responses_error.py
--- a/tests/entrypoints/openai/test_responses_function_call_parsing.py
+++ b/tests/entrypoints/openai/test_responses_function_call_parsing.py
--- a/tests/entrypoints/openai/test_response_api_with_harmony.py
+++ b/tests/entrypoints/openai/test_response_api_with_harmony.py
@@ -12,7 +12,7 @@ from openai_harmony import (
    Message,
 )

-from ...utils import RemoteOpenAIServer
+from ....utils import RemoteOpenAIServer

 MODEL_NAME = "openai/gpt-oss-20b"


--- a/tests/entrypoints/openai/test_response_api_mcp_tools.py
+++ b/tests/entrypoints/openai/test_response_api_mcp_tools.py
@@ -9,7 +9,7 @@ from openai_harmony import ToolDescription, ToolNamespaceConfig

 from vllm.entrypoints.tool_server import MCPToolServer

-from ...utils import RemoteOpenAIServer
+from ....utils import RemoteOpenAIServer

 MODEL_NAME = "openai/gpt-oss-20b"


--- a/tests/entrypoints/openai/test_response_api_parsable_context.py
+++ b/tests/entrypoints/openai/test_response_api_parsable_context.py
@@ -8,7 +8,7 @@ import pytest
 import pytest_asyncio
 from openai import OpenAI

-from ...utils import RemoteOpenAIServer
+from ....utils import RemoteOpenAIServer

 MODEL_NAME = "Qwen/Qwen3-8B"


--- a/tests/entrypoints/openai/test_response_api_simple.py
+++ b/tests/entrypoints/openai/test_response_api_simple.py
@@ -6,7 +6,7 @@ import pytest
 import pytest_asyncio
 from openai import OpenAI

-from ...utils import RemoteOpenAIServer
+from ....utils import RemoteOpenAIServer

 MODEL_NAME = "Qwen/Qwen3-8B"