Unverified Commit e5b29bf1 authored by Xuchun Shang's avatar Xuchun Shang Committed by GitHub
Browse files

[PD] Support get_model_info interface for mini_lb (#9792)


Signed-off-by: default avatarXuchun Shang <xuchun.shang@linux.alibaba.com>
Co-authored-by: default avatarTeng Ma <sima.mt@alibaba-inc.com>
parent 9a7c8842
...@@ -7,6 +7,7 @@ import dataclasses ...@@ -7,6 +7,7 @@ import dataclasses
import logging import logging
import random import random
import urllib import urllib
from http import HTTPStatus
from itertools import chain from itertools import chain
from typing import List, Optional from typing import List, Optional
...@@ -262,14 +263,38 @@ async def get_server_info(): ...@@ -262,14 +263,38 @@ async def get_server_info():
@app.get("/get_model_info") @app.get("/get_model_info")
async def get_model_info(): async def get_model_info():
# Dummy model information global load_balancer
model_info = {
"model_path": "/path/to/dummy/model", if not load_balancer or not load_balancer.prefill_servers:
"tokenizer_path": "/path/to/dummy/tokenizer", raise HTTPException(
"is_generation": True, status_code=HTTPStatus.SERVICE_UNAVAILABLE,
"preferred_sampling_params": {"temperature": 0.7, "max_new_tokens": 128}, detail="There is no server registered",
} )
return ORJSONResponse(content=model_info)
target_server_url = load_balancer.prefill_servers[0]
endpoint_url = f"{target_server_url}/get_model_info"
async with aiohttp.ClientSession() as session:
try:
async with session.get(endpoint_url) as response:
if response.status != 200:
error_text = await response.text()
raise HTTPException(
status_code=HTTPStatus.BAD_GATEWAY,
detail=(
f"Failed to get model info from {target_server_url}"
f"Status: {response.status}, Response: {error_text}"
),
)
model_info_json = await response.json()
return ORJSONResponse(content=model_info_json)
except aiohttp.ClientError as e:
raise HTTPException(
status_code=HTTPStatus.SERVICE_UNAVAILABLE,
detail=f"Failed to get model info from backend",
)
@app.post("/generate") @app.post("/generate")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment