[Frontend][CI] Consolidate instrumentator entrypoints (#34123)

Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>

[Frontend][CI] Consolidate instrumentator entrypoints (#34123)
Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
dab1de9f · wang.yuqi · GitHub · 8d48d0a9 · dab1de9f · dab1de9f
Unverified Commit dab1de9f authored Feb 10, 2026 by wang.yuqi Committed by GitHub Feb 10, 2026
16 changed files
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -132,7 +132,7 @@ steps:
  - tests/entrypoints/
  commands:
  - pytest -v -s entrypoints/openai/tool_parsers
-  - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/instrumentator --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
+  - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/rpc --ignore=entrypoints/instrumentator --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
 - label: Entrypoints Integration Test (LLM) # 30min
  timeout_in_minutes: 40
@@ -179,14 +179,14 @@ steps:
  torch_nightly: true
  source_file_dependencies:
  - vllm/
-  - tests/entrypoints/sleep
  - tests/entrypoints/rpc
+  - tests/entrypoints/instrumentator
  - tests/tool_use
  commands:
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/sleep
+  - pytest -v -s entrypoints/instrumentator
-  - pytest -v -s tool_use
  - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
+  - pytest -v -s tool_use
 - label: Entrypoints Integration Test (Pooling)
  timeout_in_minutes: 50

--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -118,7 +118,7 @@ steps:
  - tests/entrypoints/
  commands:
  - pytest -v -s entrypoints/openai/tool_parsers
-  - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
+  - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
 - label: Entrypoints Integration Test (LLM) # 30min
  timeout_in_minutes: 40
@@ -148,7 +148,7 @@ steps:
  - tests/entrypoints/test_chat_utils
  commands:
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/  --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
+  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/instrumentator --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/  --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
  - pytest -v -s entrypoints/test_chat_utils.py
 - label: Entrypoints Integration Test (API Server 2)
@@ -159,12 +159,12 @@ steps:
  torch_nightly: true
  source_file_dependencies:
  - vllm/
-  - tests/entrypoints/sleep
  - tests/entrypoints/rpc
+  - tests/entrypoints/instrumentator
  - tests/tool_use
  commands:
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/sleep
+  - pytest -v -s entrypoints/instrumentator
  - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
  - pytest -v -s tool_use

--- a/.buildkite/test_areas/entrypoints.yaml
+++ b/.buildkite/test_areas/entrypoints.yaml
@@ -42,15 +42,13 @@ steps:
  working_dir: "/vllm-workspace/tests"
  source_file_dependencies:
  - vllm/
-  - tests/tool_use
-  - tests/entrypoints/sleep
-  - tests/entrypoints/instrumentator
  - tests/entrypoints/rpc
+  - tests/entrypoints/instrumentator
+  - tests/tool_use
  commands:
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
  - pytest -v -s entrypoints/instrumentator
-  - pytest -v -s entrypoints/sleep
+  - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
  - pytest -v -s tool_use
 - label: Entrypoints Integration (Pooling)

--- a/tests/entrypoints/openai/test_basic.py
+++ b/tests/entrypoints/openai/test_basic.py
--- a/tests/entrypoints/openai/test_optional_middleware.py
+++ b/tests/entrypoints/openai/test_optional_middleware.py
--- a/tests/entrypoints/openai/test_orca_metrics.py
+++ b/tests/entrypoints/openai/test_orca_metrics.py
--- a/tests/entrypoints/sleep/test_sleep.py
+++ b/tests/entrypoints/sleep/test_sleep.py
--- a/tests/entrypoints/sleep/__init__.py
+++ b/tests/entrypoints/sleep/__init__.py
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -178,10 +178,6 @@ def build_app(
        app = FastAPI(lifespan=lifespan)
    app.state.args = args
-    from vllm.entrypoints.openai.basic.api_router import register_basic_api_routers
-    register_basic_api_routers(app)
    from vllm.entrypoints.serve import register_vllm_serve_api_routers
    register_vllm_serve_api_routers(app)
@@ -205,6 +201,24 @@ def build_app(
        register_generate_api_routers(app)
+        from vllm.entrypoints.serve.disagg.api_router import (
+            attach_router as attach_disagg_router,
+        )
+        attach_disagg_router(app)
+        from vllm.entrypoints.serve.rlhf.api_router import (
+            attach_router as attach_rlhf_router,
+        )
+        attach_rlhf_router(app)
+        from vllm.entrypoints.serve.elastic_ep.api_router import (
+            attach_router as elastic_ep_attach_router,
+        )
+        elastic_ep_attach_router(app)
    if "transcription" in supported_tasks:
        from vllm.entrypoints.openai.speech_to_text.api_router import (
            attach_router as register_speech_to_text_api_router,

--- a/vllm/entrypoints/openai/basic/__init__.py
+++ b/vllm/entrypoints/openai/basic/__init__.py
--- a/vllm/entrypoints/sagemaker/api_router.py
+++ b/vllm/entrypoints/sagemaker/api_router.py
@@ -10,10 +10,10 @@ import pydantic
 from fastapi import APIRouter, Depends, FastAPI, HTTPException, Request
 from fastapi.responses import JSONResponse, Response
-from vllm.entrypoints.openai.basic.api_router import base
 from vllm.entrypoints.openai.engine.protocol import ErrorResponse
 from vllm.entrypoints.openai.engine.serving import OpenAIServing
 from vllm.entrypoints.openai.utils import validate_json_request
+from vllm.entrypoints.serve.instrumentator.basic import base
 from vllm.entrypoints.serve.instrumentator.health import health
 from vllm.tasks import POOLING_TASKS, SupportedTask

--- a/vllm/entrypoints/serve/__init__.py
+++ b/vllm/entrypoints/serve/__init__.py
@@ -22,12 +22,6 @@ def register_vllm_serve_api_routers(app: FastAPI):
    attach_lora_router(app)
-    from vllm.entrypoints.serve.elastic_ep.api_router import (
-        attach_router as attach_elastic_ep_router,
-    )
-    attach_elastic_ep_router(app)
    from vllm.entrypoints.serve.profile.api_router import (
        attach_router as attach_profile_router,
    )
@@ -58,37 +52,6 @@ def register_vllm_serve_api_routers(app: FastAPI):
    attach_tokenize_router(app)
-    from vllm.entrypoints.serve.disagg.api_router import (
+    from .instrumentator import register_instrumentator_api_routers
-        attach_router as attach_disagg_router,
-    )
-    attach_disagg_router(app)
-    from vllm.entrypoints.serve.rlhf.api_router import (
-        attach_router as attach_rlhf_router,
-    )
-    attach_rlhf_router(app)
-    from vllm.entrypoints.serve.instrumentator.metrics import (
-        attach_router as attach_metrics_router,
-    )
-    attach_metrics_router(app)
-    from vllm.entrypoints.serve.instrumentator.health import (
-        attach_router as attach_health_router,
-    )
-    attach_health_router(app)
-    from vllm.entrypoints.serve.instrumentator.offline_docs import (
-        attach_router as attach_offline_docs_router,
-    )
-    attach_offline_docs_router(app)
-    from vllm.entrypoints.serve.instrumentator.server_info import (
-        attach_router as attach_server_info_router,
-    )
-    attach_server_info_router(app)
+    register_instrumentator_api_routers(app)
--- a/vllm/entrypoints/serve/instrumentator/__init__.py
+++ b/vllm/entrypoints/serve/instrumentator/__init__.py
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from fastapi import FastAPI
+from vllm import envs
+def register_instrumentator_api_routers(app: FastAPI):
+    from .basic import router as basic_router
+    app.include_router(basic_router)
+    from .health import router as health_router
+    app.include_router(health_router)
+    from .metrics import attach_router as metrics_attach_router
+    metrics_attach_router(app)
+    from .offline_docs import attach_router as offline_docs_attach_router
+    offline_docs_attach_router(app)
+    if envs.VLLM_SERVER_DEV_MODE:
+        from .server_info import router as server_info_router
+        app.include_router(server_info_router)
--- a/vllm/entrypoints/openai/basic/api_router.py
+++ b/vllm/entrypoints/openai/basic/api_router.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from fastapi import APIRouter, FastAPI, Request
+from fastapi import APIRouter, Request
 from fastapi.responses import JSONResponse
 from vllm.engine.protocol import EngineClient
@@ -55,7 +55,3 @@ async def get_server_load_metrics(request: Request):
 async def show_version():
    ver = {"version": VLLM_VERSION}
    return JSONResponse(content=ver)
-def register_basic_api_routers(app: FastAPI):
-    app.include_router(router)
--- a/vllm/entrypoints/serve/instrumentator/health.py
+++ b/vllm/entrypoints/serve/instrumentator/health.py
@@ -27,7 +27,3 @@ async def health(raw_request: Request) -> Response:
        return Response(status_code=200)
    except EngineDeadError:
        return Response(status_code=503)
-def attach_router(app):
-    app.include_router(router)
--- a/vllm/entrypoints/serve/instrumentator/server_info.py
+++ b/vllm/entrypoints/serve/instrumentator/server_info.py
@@ -7,7 +7,7 @@ import functools
 from typing import Annotated, Literal
 import pydantic
-from fastapi import APIRouter, FastAPI, Query, Request
+from fastapi import APIRouter, Query, Request
 from fastapi.responses import JSONResponse
 import vllm.envs as envs
@@ -57,9 +57,3 @@ async def show_server_info(
        "system_env": await asyncio.to_thread(_get_system_env_info_cached),
    }
    return JSONResponse(content=server_info)
-def attach_router(app: FastAPI):
-    if not envs.VLLM_SERVER_DEV_MODE:
-        return
-    app.include_router(router)