# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from http import HTTPStatus from fastapi import APIRouter, Depends, FastAPI, Request from fastapi.responses import JSONResponse from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest from vllm.entrypoints.openai.completion.protocol import CompletionRequest from vllm.entrypoints.openai.engine.protocol import ErrorResponse from vllm.entrypoints.openai.utils import validate_json_request from vllm.entrypoints.serve.render.serving import OpenAIServingRender from vllm.logger import init_logger logger = init_logger(__name__) router = APIRouter() def render(request: Request) -> OpenAIServingRender | None: return getattr(request.app.state, "openai_serving_render", None) @router.post( "/v1/chat/completions/render", dependencies=[Depends(validate_json_request)], response_model=list, responses={ HTTPStatus.BAD_REQUEST.value: {"model": ErrorResponse}, HTTPStatus.NOT_FOUND.value: {"model": ErrorResponse}, HTTPStatus.NOT_IMPLEMENTED.value: {"model": ErrorResponse}, HTTPStatus.INTERNAL_SERVER_ERROR.value: {"model": ErrorResponse}, }, ) async def render_chat_completion(request: ChatCompletionRequest, raw_request: Request): handler = render(raw_request) if handler is None: raise NotImplementedError( "The model does not support Chat Completions Render API" ) result = await handler.render_chat_request(request) if isinstance(result, ErrorResponse): return JSONResponse(content=result.model_dump(), status_code=result.error.code) return JSONResponse(content=result) @router.post( "/v1/completions/render", dependencies=[Depends(validate_json_request)], response_model=list, responses={ HTTPStatus.BAD_REQUEST.value: {"model": ErrorResponse}, HTTPStatus.NOT_FOUND.value: {"model": ErrorResponse}, HTTPStatus.INTERNAL_SERVER_ERROR.value: {"model": ErrorResponse}, }, ) async def render_completion(request: CompletionRequest, raw_request: Request): handler = render(raw_request) if handler is None: raise NotImplementedError("The model does not support Completions Render API") result = await handler.render_completion_request(request) if isinstance(result, ErrorResponse): return JSONResponse(content=result.model_dump(), status_code=result.error.code) return JSONResponse(content=result) def attach_router(app: FastAPI) -> None: app.include_router(router)