Unverified Commit 6242c399 authored by Lucien's avatar Lucien Committed by GitHub
Browse files

Generate 1 token to verify the health of the inference service in /health (#1154)


Co-authored-by: default avatarYineng Zhang <me@zhyncs.com>
parent 04707b09
...@@ -89,6 +89,23 @@ app = FastAPI() ...@@ -89,6 +89,23 @@ app = FastAPI()
tokenizer_manager = None tokenizer_manager = None
@app.get("/v1/health")
async def health(request: Request) -> Response:
"""
Generate 1 token to verify the health of the inference service.
"""
gri = GenerateReqInput(
text="s", sampling_params={"max_new_tokens": 1, "temperature": 0.7}
)
try:
async for _ in tokenizer_manager.generate_request(gri, request):
break
return Response(status_code=200)
except Exception as e:
logger.exception(e)
return Response(status_code=503)
@app.get("/health") @app.get("/health")
async def health() -> Response: async def health() -> Response:
"""Health check.""" """Health check."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment