Unverified Commit ffd03a9b authored by Simo Lin's avatar Simo Lin Committed by GitHub
Browse files

[router] fix get load response parsing (#11213)

parent 666da3d5
......@@ -141,7 +141,17 @@ def create_app(args: argparse.Namespace) -> FastAPI:
@app.get("/get_load")
async def get_load(request: Request):
check_api_key(request)
return JSONResponse({"load": _inflight})
# Return format matching real workers: array of load info per DP rank
return JSONResponse(
[
{
"dp_rank": 0,
"num_reqs": _inflight,
"num_waiting_reqs": 0,
"num_tokens": _inflight,
}
]
)
def make_json_response(obj: dict, status_code: int = 200) -> JSONResponse:
resp = JSONResponse(obj, status_code=status_code)
......
......@@ -1252,11 +1252,22 @@ impl WorkerManager {
Ok(response) if response.status().is_success() => {
match response.json::<Value>().await {
Ok(json) => {
if let Some(load) = json.get("load").and_then(|v| v.as_i64()) {
debug!("Worker {} load: {}", url, load);
Some(load as isize)
// The /get_load endpoint returns an array of load info objects (one per DP rank)
// Each object has: {dp_rank, num_reqs, num_waiting_reqs, num_tokens}
if let Some(array) = json.as_array() {
let total_tokens: i64 = array
.iter()
.filter_map(|entry| {
entry.get("num_tokens").and_then(|v| v.as_i64())
})
.sum();
debug!("Worker {} load (total tokens): {}", url, total_tokens);
Some(total_tokens as isize)
} else {
warn!("Invalid load response from {}: {:?}", url, json);
warn!(
"Invalid load response from {}: expected array, got {:?}",
url, json
);
None
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment