Unverified Commit bd835b03 authored by Ziqi Fan's avatar Ziqi Fan Committed by GitHub
Browse files

fix: change KVBM Connector build_connector_meta due to vLLM ToT change (#2887)


Signed-off-by: default avatarZiqi Fan <ziqif@nvidia.com>
parent c8ecc402
...@@ -142,13 +142,22 @@ class KvConnectorLeader: ...@@ -142,13 +142,22 @@ class KvConnectorLeader:
scheduler_output.scheduled_cached_reqs.new_block_ids, scheduler_output.scheduled_cached_reqs.new_block_ids,
scheduler_output.scheduled_cached_reqs.num_computed_tokens, scheduler_output.scheduled_cached_reqs.num_computed_tokens,
): ):
output.add_cached_request( if new_block_ids is not None:
request_id=req_id, output.add_cached_request(
resumed_from_preemption=resumed_from_preemption, request_id=req_id,
new_token_ids=new_token_ids, resumed_from_preemption=resumed_from_preemption,
new_block_ids=new_block_ids[0], new_token_ids=new_token_ids,
num_computed_tokens=num_computed_tokens, new_block_ids=new_block_ids[0],
) num_computed_tokens=num_computed_tokens,
)
else:
output.add_cached_request(
request_id=req_id,
resumed_from_preemption=resumed_from_preemption,
new_token_ids=new_token_ids,
new_block_ids=[],
num_computed_tokens=num_computed_tokens,
)
output.add_num_scheduled_tokens(scheduler_output.num_scheduled_tokens) output.add_num_scheduled_tokens(scheduler_output.num_scheduled_tokens)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment