Unverified Commit 578087e5 authored by Russell Bryant's avatar Russell Bryant Committed by GitHub
Browse files

[Frontend] Pass pre-created socket to uvicorn (#13113)

parent fa253f1a
...@@ -127,6 +127,7 @@ async def run_server(args: Namespace, ...@@ -127,6 +127,7 @@ async def run_server(args: Namespace,
shutdown_task = await serve_http( shutdown_task = await serve_http(
app, app,
sock=None,
host=args.host, host=args.host,
port=args.port, port=args.port,
log_level=args.log_level, log_level=args.log_level,
......
...@@ -2,8 +2,9 @@ ...@@ -2,8 +2,9 @@
import asyncio import asyncio
import signal import signal
import socket
from http import HTTPStatus from http import HTTPStatus
from typing import Any from typing import Any, Optional
import uvicorn import uvicorn
from fastapi import FastAPI, Request, Response from fastapi import FastAPI, Request, Response
...@@ -17,7 +18,8 @@ from vllm.utils import find_process_using_port ...@@ -17,7 +18,8 @@ from vllm.utils import find_process_using_port
logger = init_logger(__name__) logger = init_logger(__name__)
async def serve_http(app: FastAPI, **uvicorn_kwargs: Any): async def serve_http(app: FastAPI, sock: Optional[socket.socket],
**uvicorn_kwargs: Any):
logger.info("Available routes are:") logger.info("Available routes are:")
for route in app.routes: for route in app.routes:
methods = getattr(route, "methods", None) methods = getattr(route, "methods", None)
...@@ -34,7 +36,8 @@ async def serve_http(app: FastAPI, **uvicorn_kwargs: Any): ...@@ -34,7 +36,8 @@ async def serve_http(app: FastAPI, **uvicorn_kwargs: Any):
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
server_task = loop.create_task(server.serve()) server_task = loop.create_task(
server.serve(sockets=[sock] if sock else None))
def signal_handler() -> None: def signal_handler() -> None:
# prevents the uvicorn signal handler to exit early # prevents the uvicorn signal handler to exit early
......
...@@ -10,7 +10,6 @@ import os ...@@ -10,7 +10,6 @@ import os
import re import re
import signal import signal
import socket import socket
import sys
import tempfile import tempfile
import uuid import uuid
from argparse import Namespace from argparse import Namespace
...@@ -831,6 +830,7 @@ def create_server_socket(addr: Tuple[str, int]) -> socket.socket: ...@@ -831,6 +830,7 @@ def create_server_socket(addr: Tuple[str, int]) -> socket.socket:
sock = socket.socket(family=family, type=socket.SOCK_STREAM) sock = socket.socket(family=family, type=socket.SOCK_STREAM)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
sock.bind(addr) sock.bind(addr)
return sock return sock
...@@ -878,8 +878,17 @@ async def run_server(args, **uvicorn_kwargs) -> None: ...@@ -878,8 +878,17 @@ async def run_server(args, **uvicorn_kwargs) -> None:
model_config = await engine_client.get_model_config() model_config = await engine_client.get_model_config()
await init_app_state(engine_client, model_config, app.state, args) await init_app_state(engine_client, model_config, app.state, args)
def _listen_addr(a: str) -> str:
if is_valid_ipv6_address(a):
return '[' + a + ']'
return a or "0.0.0.0"
logger.info("Starting vLLM API server on http://%s:%d",
_listen_addr(sock_addr[0]), sock_addr[1])
shutdown_task = await serve_http( shutdown_task = await serve_http(
app, app,
sock=sock,
host=args.host, host=args.host,
port=args.port, port=args.port,
log_level=args.uvicorn_log_level, log_level=args.uvicorn_log_level,
...@@ -888,8 +897,6 @@ async def run_server(args, **uvicorn_kwargs) -> None: ...@@ -888,8 +897,6 @@ async def run_server(args, **uvicorn_kwargs) -> None:
ssl_certfile=args.ssl_certfile, ssl_certfile=args.ssl_certfile,
ssl_ca_certs=args.ssl_ca_certs, ssl_ca_certs=args.ssl_ca_certs,
ssl_cert_reqs=args.ssl_cert_reqs, ssl_cert_reqs=args.ssl_cert_reqs,
# Workaround to work on macOS
fd=sock.fileno() if sys.platform.startswith("darwin") else None,
**uvicorn_kwargs, **uvicorn_kwargs,
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment