"vllm/vscode:/vscode.git/clone" did not exist on "e33ee23ee3cde5aa69912d9d03aa31421851662c"
api_server_async_engine.py 1.51 KB
Newer Older
1
2
3
4
5
6
7
8
9
"""vllm.entrypoints.api_server with some extra logging for testing."""
from typing import Any, Dict

import uvicorn
from fastapi.responses import JSONResponse, Response

import vllm.entrypoints.api_server
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
10
from vllm.utils import FlexibleArgumentParser
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35

app = vllm.entrypoints.api_server.app


class AsyncLLMEngineWithStats(AsyncLLMEngine):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._num_aborts = 0

    async def abort(self, request_id: str) -> None:
        await super().abort(request_id)
        self._num_aborts += 1

    def testing_stats(self) -> Dict[str, Any]:
        return {"num_aborted_requests": self._num_aborts}


@app.get("/stats")
def stats() -> Response:
    """Get the statistics of the engine."""
    return JSONResponse(engine.testing_stats())


if __name__ == "__main__":
36
    parser = FlexibleArgumentParser()
37
38
39
40
41
42
    parser.add_argument("--host", type=str, default="localhost")
    parser.add_argument("--port", type=int, default=8000)
    parser = AsyncEngineArgs.add_cli_args(parser)
    args = parser.parse_args()

    engine_args = AsyncEngineArgs.from_cli_args(args)
43
    engine = AsyncLLMEngineWithStats.from_engine_args(engine_args)
44
45
46
47
48
49
50
    vllm.entrypoints.api_server.engine = engine
    uvicorn.run(
        app,
        host=args.host,
        port=args.port,
        log_level="debug",
        timeout_keep_alive=vllm.entrypoints.api_server.TIMEOUT_KEEP_ALIVE)