Unverified Commit 720b10fd authored by Robert Shaw's avatar Robert Shaw Committed by GitHub
Browse files

[1/N] API Server (Remove Proxy) (#11529)

parent b85a9778
...@@ -585,9 +585,15 @@ def build_app(args: Namespace) -> FastAPI: ...@@ -585,9 +585,15 @@ def build_app(args: Namespace) -> FastAPI:
status_code=401) status_code=401)
return await call_next(request) return await call_next(request)
if args.enable_request_id_headers:
logger.warning(
"CAUTION: Enabling X-Request-Id headers in the API Server. "
"This can harm performance at high QPS.")
@app.middleware("http") @app.middleware("http")
async def add_request_id(request: Request, call_next): async def add_request_id(request: Request, call_next):
request_id = request.headers.get("X-Request-Id") or uuid.uuid4().hex request_id = request.headers.get(
"X-Request-Id") or uuid.uuid4().hex
response = await call_next(request) response = await call_next(request)
response.headers["X-Request-Id"] = request_id response.headers["X-Request-Id"] = request_id
return response return response
......
...@@ -196,7 +196,11 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: ...@@ -196,7 +196,11 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
action="store_true", action="store_true",
help="If specified, will run the OpenAI frontend server in the same " help="If specified, will run the OpenAI frontend server in the same "
"process as the model serving engine.") "process as the model serving engine.")
parser.add_argument(
"--enable-request-id-headers",
action="store_true",
help="If specified, API server will add X-Request-Id header to "
"responses. Caution: this hurts performance at high QPS.")
parser.add_argument( parser.add_argument(
"--enable-auto-tool-choice", "--enable-auto-tool-choice",
action="store_true", action="store_true",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment