Unverified Commit dab1de9f authored by wang.yuqi's avatar wang.yuqi Committed by GitHub
Browse files

[Frontend][CI] Consolidate instrumentator entrypoints (#34123)


Signed-off-by: default avatarwang.yuqi <yuqi.wang@daocloud.io>
parent 8d48d0a9
......@@ -132,7 +132,7 @@ steps:
- tests/entrypoints/
commands:
- pytest -v -s entrypoints/openai/tool_parsers
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/instrumentator --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/rpc --ignore=entrypoints/instrumentator --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
- label: Entrypoints Integration Test (LLM) # 30min
timeout_in_minutes: 40
......@@ -179,14 +179,14 @@ steps:
torch_nightly: true
source_file_dependencies:
- vllm/
- tests/entrypoints/sleep
- tests/entrypoints/rpc
- tests/entrypoints/instrumentator
- tests/tool_use
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/sleep
- pytest -v -s tool_use
- pytest -v -s entrypoints/instrumentator
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
- pytest -v -s tool_use
- label: Entrypoints Integration Test (Pooling)
timeout_in_minutes: 50
......
......@@ -118,7 +118,7 @@ steps:
- tests/entrypoints/
commands:
- pytest -v -s entrypoints/openai/tool_parsers
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
- label: Entrypoints Integration Test (LLM) # 30min
timeout_in_minutes: 40
......@@ -148,7 +148,7 @@ steps:
- tests/entrypoints/test_chat_utils
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/instrumentator --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
- pytest -v -s entrypoints/test_chat_utils.py
- label: Entrypoints Integration Test (API Server 2)
......@@ -159,12 +159,12 @@ steps:
torch_nightly: true
source_file_dependencies:
- vllm/
- tests/entrypoints/sleep
- tests/entrypoints/rpc
- tests/entrypoints/instrumentator
- tests/tool_use
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/sleep
- pytest -v -s entrypoints/instrumentator
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
- pytest -v -s tool_use
......
......@@ -42,15 +42,13 @@ steps:
working_dir: "/vllm-workspace/tests"
source_file_dependencies:
- vllm/
- tests/tool_use
- tests/entrypoints/sleep
- tests/entrypoints/instrumentator
- tests/entrypoints/rpc
- tests/entrypoints/instrumentator
- tests/tool_use
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
- pytest -v -s entrypoints/instrumentator
- pytest -v -s entrypoints/sleep
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
- pytest -v -s tool_use
- label: Entrypoints Integration (Pooling)
......
......@@ -178,10 +178,6 @@ def build_app(
app = FastAPI(lifespan=lifespan)
app.state.args = args
from vllm.entrypoints.openai.basic.api_router import register_basic_api_routers
register_basic_api_routers(app)
from vllm.entrypoints.serve import register_vllm_serve_api_routers
register_vllm_serve_api_routers(app)
......@@ -205,6 +201,24 @@ def build_app(
register_generate_api_routers(app)
from vllm.entrypoints.serve.disagg.api_router import (
attach_router as attach_disagg_router,
)
attach_disagg_router(app)
from vllm.entrypoints.serve.rlhf.api_router import (
attach_router as attach_rlhf_router,
)
attach_rlhf_router(app)
from vllm.entrypoints.serve.elastic_ep.api_router import (
attach_router as elastic_ep_attach_router,
)
elastic_ep_attach_router(app)
if "transcription" in supported_tasks:
from vllm.entrypoints.openai.speech_to_text.api_router import (
attach_router as register_speech_to_text_api_router,
......
......@@ -10,10 +10,10 @@ import pydantic
from fastapi import APIRouter, Depends, FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse, Response
from vllm.entrypoints.openai.basic.api_router import base
from vllm.entrypoints.openai.engine.protocol import ErrorResponse
from vllm.entrypoints.openai.engine.serving import OpenAIServing
from vllm.entrypoints.openai.utils import validate_json_request
from vllm.entrypoints.serve.instrumentator.basic import base
from vllm.entrypoints.serve.instrumentator.health import health
from vllm.tasks import POOLING_TASKS, SupportedTask
......
......@@ -22,12 +22,6 @@ def register_vllm_serve_api_routers(app: FastAPI):
attach_lora_router(app)
from vllm.entrypoints.serve.elastic_ep.api_router import (
attach_router as attach_elastic_ep_router,
)
attach_elastic_ep_router(app)
from vllm.entrypoints.serve.profile.api_router import (
attach_router as attach_profile_router,
)
......@@ -58,37 +52,6 @@ def register_vllm_serve_api_routers(app: FastAPI):
attach_tokenize_router(app)
from vllm.entrypoints.serve.disagg.api_router import (
attach_router as attach_disagg_router,
)
attach_disagg_router(app)
from vllm.entrypoints.serve.rlhf.api_router import (
attach_router as attach_rlhf_router,
)
attach_rlhf_router(app)
from vllm.entrypoints.serve.instrumentator.metrics import (
attach_router as attach_metrics_router,
)
attach_metrics_router(app)
from vllm.entrypoints.serve.instrumentator.health import (
attach_router as attach_health_router,
)
attach_health_router(app)
from vllm.entrypoints.serve.instrumentator.offline_docs import (
attach_router as attach_offline_docs_router,
)
attach_offline_docs_router(app)
from vllm.entrypoints.serve.instrumentator.server_info import (
attach_router as attach_server_info_router,
)
from .instrumentator import register_instrumentator_api_routers
attach_server_info_router(app)
register_instrumentator_api_routers(app)
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from fastapi import FastAPI
from vllm import envs
def register_instrumentator_api_routers(app: FastAPI):
from .basic import router as basic_router
app.include_router(basic_router)
from .health import router as health_router
app.include_router(health_router)
from .metrics import attach_router as metrics_attach_router
metrics_attach_router(app)
from .offline_docs import attach_router as offline_docs_attach_router
offline_docs_attach_router(app)
if envs.VLLM_SERVER_DEV_MODE:
from .server_info import router as server_info_router
app.include_router(server_info_router)
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from fastapi import APIRouter, FastAPI, Request
from fastapi import APIRouter, Request
from fastapi.responses import JSONResponse
from vllm.engine.protocol import EngineClient
......@@ -55,7 +55,3 @@ async def get_server_load_metrics(request: Request):
async def show_version():
ver = {"version": VLLM_VERSION}
return JSONResponse(content=ver)
def register_basic_api_routers(app: FastAPI):
app.include_router(router)
......@@ -27,7 +27,3 @@ async def health(raw_request: Request) -> Response:
return Response(status_code=200)
except EngineDeadError:
return Response(status_code=503)
def attach_router(app):
app.include_router(router)
......@@ -7,7 +7,7 @@ import functools
from typing import Annotated, Literal
import pydantic
from fastapi import APIRouter, FastAPI, Query, Request
from fastapi import APIRouter, Query, Request
from fastapi.responses import JSONResponse
import vllm.envs as envs
......@@ -57,9 +57,3 @@ async def show_server_info(
"system_env": await asyncio.to_thread(_get_system_env_info_cached),
}
return JSONResponse(content=server_info)
def attach_router(app: FastAPI):
if not envs.VLLM_SERVER_DEV_MODE:
return
app.include_router(router)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment