"vscode:/vscode.git/clone" did not exist on "71df2a57effc15b5f67cdbf55f3d1e1b71f90e86"
Unverified Commit dab1de9f authored by wang.yuqi's avatar wang.yuqi Committed by GitHub
Browse files

[Frontend][CI] Consolidate instrumentator entrypoints (#34123)


Signed-off-by: default avatarwang.yuqi <yuqi.wang@daocloud.io>
parent 8d48d0a9
...@@ -132,7 +132,7 @@ steps: ...@@ -132,7 +132,7 @@ steps:
- tests/entrypoints/ - tests/entrypoints/
commands: commands:
- pytest -v -s entrypoints/openai/tool_parsers - pytest -v -s entrypoints/openai/tool_parsers
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/instrumentator --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/rpc --ignore=entrypoints/instrumentator --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
- label: Entrypoints Integration Test (LLM) # 30min - label: Entrypoints Integration Test (LLM) # 30min
timeout_in_minutes: 40 timeout_in_minutes: 40
...@@ -179,14 +179,14 @@ steps: ...@@ -179,14 +179,14 @@ steps:
torch_nightly: true torch_nightly: true
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
- tests/entrypoints/sleep
- tests/entrypoints/rpc - tests/entrypoints/rpc
- tests/entrypoints/instrumentator
- tests/tool_use - tests/tool_use
commands: commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/sleep - pytest -v -s entrypoints/instrumentator
- pytest -v -s tool_use
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
- pytest -v -s tool_use
- label: Entrypoints Integration Test (Pooling) - label: Entrypoints Integration Test (Pooling)
timeout_in_minutes: 50 timeout_in_minutes: 50
......
...@@ -118,7 +118,7 @@ steps: ...@@ -118,7 +118,7 @@ steps:
- tests/entrypoints/ - tests/entrypoints/
commands: commands:
- pytest -v -s entrypoints/openai/tool_parsers - pytest -v -s entrypoints/openai/tool_parsers
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
- label: Entrypoints Integration Test (LLM) # 30min - label: Entrypoints Integration Test (LLM) # 30min
timeout_in_minutes: 40 timeout_in_minutes: 40
...@@ -148,7 +148,7 @@ steps: ...@@ -148,7 +148,7 @@ steps:
- tests/entrypoints/test_chat_utils - tests/entrypoints/test_chat_utils
commands: commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/instrumentator --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
- pytest -v -s entrypoints/test_chat_utils.py - pytest -v -s entrypoints/test_chat_utils.py
- label: Entrypoints Integration Test (API Server 2) - label: Entrypoints Integration Test (API Server 2)
...@@ -159,12 +159,12 @@ steps: ...@@ -159,12 +159,12 @@ steps:
torch_nightly: true torch_nightly: true
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
- tests/entrypoints/sleep
- tests/entrypoints/rpc - tests/entrypoints/rpc
- tests/entrypoints/instrumentator
- tests/tool_use - tests/tool_use
commands: commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/sleep - pytest -v -s entrypoints/instrumentator
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
- pytest -v -s tool_use - pytest -v -s tool_use
......
...@@ -42,15 +42,13 @@ steps: ...@@ -42,15 +42,13 @@ steps:
working_dir: "/vllm-workspace/tests" working_dir: "/vllm-workspace/tests"
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
- tests/tool_use
- tests/entrypoints/sleep
- tests/entrypoints/instrumentator
- tests/entrypoints/rpc - tests/entrypoints/rpc
- tests/entrypoints/instrumentator
- tests/tool_use
commands: commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_WORKER_MULTIPROC_METHOD=spawn
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
- pytest -v -s entrypoints/instrumentator - pytest -v -s entrypoints/instrumentator
- pytest -v -s entrypoints/sleep - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
- pytest -v -s tool_use - pytest -v -s tool_use
- label: Entrypoints Integration (Pooling) - label: Entrypoints Integration (Pooling)
......
...@@ -178,10 +178,6 @@ def build_app( ...@@ -178,10 +178,6 @@ def build_app(
app = FastAPI(lifespan=lifespan) app = FastAPI(lifespan=lifespan)
app.state.args = args app.state.args = args
from vllm.entrypoints.openai.basic.api_router import register_basic_api_routers
register_basic_api_routers(app)
from vllm.entrypoints.serve import register_vllm_serve_api_routers from vllm.entrypoints.serve import register_vllm_serve_api_routers
register_vllm_serve_api_routers(app) register_vllm_serve_api_routers(app)
...@@ -205,6 +201,24 @@ def build_app( ...@@ -205,6 +201,24 @@ def build_app(
register_generate_api_routers(app) register_generate_api_routers(app)
from vllm.entrypoints.serve.disagg.api_router import (
attach_router as attach_disagg_router,
)
attach_disagg_router(app)
from vllm.entrypoints.serve.rlhf.api_router import (
attach_router as attach_rlhf_router,
)
attach_rlhf_router(app)
from vllm.entrypoints.serve.elastic_ep.api_router import (
attach_router as elastic_ep_attach_router,
)
elastic_ep_attach_router(app)
if "transcription" in supported_tasks: if "transcription" in supported_tasks:
from vllm.entrypoints.openai.speech_to_text.api_router import ( from vllm.entrypoints.openai.speech_to_text.api_router import (
attach_router as register_speech_to_text_api_router, attach_router as register_speech_to_text_api_router,
......
...@@ -10,10 +10,10 @@ import pydantic ...@@ -10,10 +10,10 @@ import pydantic
from fastapi import APIRouter, Depends, FastAPI, HTTPException, Request from fastapi import APIRouter, Depends, FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse, Response from fastapi.responses import JSONResponse, Response
from vllm.entrypoints.openai.basic.api_router import base
from vllm.entrypoints.openai.engine.protocol import ErrorResponse from vllm.entrypoints.openai.engine.protocol import ErrorResponse
from vllm.entrypoints.openai.engine.serving import OpenAIServing from vllm.entrypoints.openai.engine.serving import OpenAIServing
from vllm.entrypoints.openai.utils import validate_json_request from vllm.entrypoints.openai.utils import validate_json_request
from vllm.entrypoints.serve.instrumentator.basic import base
from vllm.entrypoints.serve.instrumentator.health import health from vllm.entrypoints.serve.instrumentator.health import health
from vllm.tasks import POOLING_TASKS, SupportedTask from vllm.tasks import POOLING_TASKS, SupportedTask
......
...@@ -22,12 +22,6 @@ def register_vllm_serve_api_routers(app: FastAPI): ...@@ -22,12 +22,6 @@ def register_vllm_serve_api_routers(app: FastAPI):
attach_lora_router(app) attach_lora_router(app)
from vllm.entrypoints.serve.elastic_ep.api_router import (
attach_router as attach_elastic_ep_router,
)
attach_elastic_ep_router(app)
from vllm.entrypoints.serve.profile.api_router import ( from vllm.entrypoints.serve.profile.api_router import (
attach_router as attach_profile_router, attach_router as attach_profile_router,
) )
...@@ -58,37 +52,6 @@ def register_vllm_serve_api_routers(app: FastAPI): ...@@ -58,37 +52,6 @@ def register_vllm_serve_api_routers(app: FastAPI):
attach_tokenize_router(app) attach_tokenize_router(app)
from vllm.entrypoints.serve.disagg.api_router import ( from .instrumentator import register_instrumentator_api_routers
attach_router as attach_disagg_router,
)
attach_disagg_router(app)
from vllm.entrypoints.serve.rlhf.api_router import (
attach_router as attach_rlhf_router,
)
attach_rlhf_router(app)
from vllm.entrypoints.serve.instrumentator.metrics import (
attach_router as attach_metrics_router,
)
attach_metrics_router(app)
from vllm.entrypoints.serve.instrumentator.health import (
attach_router as attach_health_router,
)
attach_health_router(app)
from vllm.entrypoints.serve.instrumentator.offline_docs import (
attach_router as attach_offline_docs_router,
)
attach_offline_docs_router(app)
from vllm.entrypoints.serve.instrumentator.server_info import (
attach_router as attach_server_info_router,
)
attach_server_info_router(app) register_instrumentator_api_routers(app)
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from fastapi import FastAPI
from vllm import envs
def register_instrumentator_api_routers(app: FastAPI):
from .basic import router as basic_router
app.include_router(basic_router)
from .health import router as health_router
app.include_router(health_router)
from .metrics import attach_router as metrics_attach_router
metrics_attach_router(app)
from .offline_docs import attach_router as offline_docs_attach_router
offline_docs_attach_router(app)
if envs.VLLM_SERVER_DEV_MODE:
from .server_info import router as server_info_router
app.include_router(server_info_router)
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from fastapi import APIRouter, FastAPI, Request from fastapi import APIRouter, Request
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from vllm.engine.protocol import EngineClient from vllm.engine.protocol import EngineClient
...@@ -55,7 +55,3 @@ async def get_server_load_metrics(request: Request): ...@@ -55,7 +55,3 @@ async def get_server_load_metrics(request: Request):
async def show_version(): async def show_version():
ver = {"version": VLLM_VERSION} ver = {"version": VLLM_VERSION}
return JSONResponse(content=ver) return JSONResponse(content=ver)
def register_basic_api_routers(app: FastAPI):
app.include_router(router)
...@@ -27,7 +27,3 @@ async def health(raw_request: Request) -> Response: ...@@ -27,7 +27,3 @@ async def health(raw_request: Request) -> Response:
return Response(status_code=200) return Response(status_code=200)
except EngineDeadError: except EngineDeadError:
return Response(status_code=503) return Response(status_code=503)
def attach_router(app):
app.include_router(router)
...@@ -7,7 +7,7 @@ import functools ...@@ -7,7 +7,7 @@ import functools
from typing import Annotated, Literal from typing import Annotated, Literal
import pydantic import pydantic
from fastapi import APIRouter, FastAPI, Query, Request from fastapi import APIRouter, Query, Request
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
import vllm.envs as envs import vllm.envs as envs
...@@ -57,9 +57,3 @@ async def show_server_info( ...@@ -57,9 +57,3 @@ async def show_server_info(
"system_env": await asyncio.to_thread(_get_system_env_info_cached), "system_env": await asyncio.to_thread(_get_system_env_info_cached),
} }
return JSONResponse(content=server_info) return JSONResponse(content=server_info)
def attach_router(app: FastAPI):
if not envs.VLLM_SERVER_DEV_MODE:
return
app.include_router(router)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment