launcher.py 4.38 KB
Newer Older
1
2
# SPDX-License-Identifier: Apache-2.0

3
4
import asyncio
import signal
5
import socket
6
from http import HTTPStatus
7
from typing import Any, Optional
8
9

import uvicorn
10
from fastapi import FastAPI, Request, Response
11

12
13
from vllm import envs
from vllm.engine.async_llm_engine import AsyncEngineDeadError
14
from vllm.engine.multiprocessing import MQEngineDeadError
15
from vllm.entrypoints.ssl import SSLCertRefresher
16
from vllm.logger import init_logger
17
from vllm.utils import find_process_using_port
18
19
20
21

logger = init_logger(__name__)


22
23
24
async def serve_http(app: FastAPI,
                     sock: Optional[socket.socket],
                     enable_ssl_refresh: bool = False,
25
                     **uvicorn_kwargs: Any):
26
27
28
29
30
31
32
33
34
35
36
    logger.info("Available routes are:")
    for route in app.routes:
        methods = getattr(route, "methods", None)
        path = getattr(route, "path", None)

        if methods is None or path is None:
            continue

        logger.info("Route: %s, Methods: %s", path, ', '.join(methods))

    config = uvicorn.Config(app, **uvicorn_kwargs)
37
    config.load()
38
    server = uvicorn.Server(config)
39
    _add_shutdown_handlers(app, server)
40
41
42

    loop = asyncio.get_running_loop()

43
44
    server_task = loop.create_task(
        server.serve(sockets=[sock] if sock else None))
45

46
47
48
49
50
51
    ssl_cert_refresher = None if not enable_ssl_refresh else SSLCertRefresher(
        ssl_context=config.ssl,
        key_path=config.ssl_keyfile,
        cert_path=config.ssl_certfile,
        ca_path=config.ssl_ca_certs)

52
53
54
    def signal_handler() -> None:
        # prevents the uvicorn signal handler to exit early
        server_task.cancel()
55
56
        if ssl_cert_refresher:
            ssl_cert_refresher.stop()
57
58
59
60
61
62
63
64
65
66
67

    async def dummy_shutdown() -> None:
        pass

    loop.add_signal_handler(signal.SIGINT, signal_handler)
    loop.add_signal_handler(signal.SIGTERM, signal_handler)

    try:
        await server_task
        return dummy_shutdown()
    except asyncio.CancelledError:
68
69
70
71
72
73
        port = uvicorn_kwargs["port"]
        process = find_process_using_port(port)
        if process is not None:
            logger.debug(
                "port %s is used by process %s launched with command:\n%s",
                port, process, " ".join(process.cmdline()))
74
        logger.info("Shutting down FastAPI HTTP server.")
75
        return server.shutdown()
76
77


78
def _add_shutdown_handlers(app: FastAPI, server: uvicorn.Server) -> None:
79
80
81
    """Adds handlers for fatal errors that should crash the server"""

    @app.exception_handler(RuntimeError)
82
    async def runtime_error_handler(request: Request, __):
83
84
85
        """On generic runtime error, check to see if the engine has died.
        It probably has, in which case the server will no longer be able to
        handle requests. Trigger a graceful shutdown with a SIGTERM."""
86
        engine = request.app.state.engine_client
87
88
89
90
91
92
93
94
95
96
97
98
99
100
        if (not envs.VLLM_KEEP_ALIVE_ON_ENGINE_DEATH and engine.errored
                and not engine.is_running):
            logger.fatal("AsyncLLMEngine has failed, terminating server "
                         "process")
            # See discussions here on shutting down a uvicorn server
            # https://github.com/encode/uvicorn/discussions/1103
            # In this case we cannot await the server shutdown here because
            # this handler must first return to close the connection for
            # this request.
            server.should_exit = True

        return Response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR)

    @app.exception_handler(AsyncEngineDeadError)
101
    async def async_engine_dead_handler(_, __):
102
103
104
105
106
107
108
109
        """Kill the server if the async engine is already dead. It will
        not handle any further requests."""
        if not envs.VLLM_KEEP_ALIVE_ON_ENGINE_DEATH:
            logger.fatal("AsyncLLMEngine is already dead, terminating server "
                         "process")
            server.should_exit = True

        return Response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR)
110
111
112
113
114
115
116
117
118
119
120

    @app.exception_handler(MQEngineDeadError)
    async def mq_engine_dead_handler(_, __):
        """Kill the server if the mq engine is already dead. It will
        not handle any further requests."""
        if not envs.VLLM_KEEP_ALIVE_ON_ENGINE_DEATH:
            logger.fatal("MQLLMEngine is already dead, terminating server "
                         "process")
            server.should_exit = True

        return Response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR)