Unverified Commit d88bff1b authored by cjackal's avatar cjackal Committed by GitHub
Browse files

[Frontend] add `add_request_id` middleware (#9594)


Signed-off-by: default avatarcjackal <44624812+cjackal@users.noreply.github.com>
parent 9e372664
...@@ -62,6 +62,32 @@ completion = client.chat.completions.create( ...@@ -62,6 +62,32 @@ completion = client.chat.completions.create(
) )
``` ```
### Extra HTTP Headers
Only `X-Request-Id` HTTP request header is supported for now.
```python
completion = client.chat.completions.create(
model="NousResearch/Meta-Llama-3-8B-Instruct",
messages=[
{"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"}
],
extra_headers={
"x-request-id": "sentiment-classification-00001",
}
)
print(completion._request_id)
completion = client.completions.create(
model="NousResearch/Meta-Llama-3-8B-Instruct",
prompt="A robot may not injure a human being",
extra_headers={
"x-request-id": "completion-test",
}
)
print(completion._request_id)
```
### Extra Parameters for Completions API ### Extra Parameters for Completions API
The following [sampling parameters (click through to see documentation)](../dev/sampling_params.rst) are supported. The following [sampling parameters (click through to see documentation)](../dev/sampling_params.rst) are supported.
......
...@@ -7,6 +7,7 @@ import re ...@@ -7,6 +7,7 @@ import re
import signal import signal
import socket import socket
import tempfile import tempfile
import uuid
from argparse import Namespace from argparse import Namespace
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from functools import partial from functools import partial
...@@ -475,6 +476,13 @@ def build_app(args: Namespace) -> FastAPI: ...@@ -475,6 +476,13 @@ def build_app(args: Namespace) -> FastAPI:
status_code=401) status_code=401)
return await call_next(request) return await call_next(request)
@app.middleware("http")
async def add_request_id(request: Request, call_next):
request_id = request.headers.get("X-Request-Id") or uuid.uuid4().hex
response = await call_next(request)
response.headers["X-Request-Id"] = request_id
return response
for middleware in args.middleware: for middleware in args.middleware:
module_path, object_name = middleware.rsplit(".", 1) module_path, object_name = middleware.rsplit(".", 1)
imported = getattr(importlib.import_module(module_path), object_name) imported = getattr(importlib.import_module(module_path), object_name)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment