Unverified Commit f7890bf0 authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

fix: Fix vllm v0 None*int error when not using kv aware router (#1304)

parent c939da0c
...@@ -212,7 +212,8 @@ class VllmWorker: ...@@ -212,7 +212,8 @@ class VllmWorker:
prefill_queue_size = await prefill_queue.get_queue_size() prefill_queue_size = await prefill_queue.get_queue_size()
disagg_router_decision = await self.disaggregated_router.prefill_remote( disagg_router_decision = await self.disaggregated_router.prefill_remote(
len(request.token_ids), len(request.token_ids),
request.estimated_prefix_hit_num_blocks * self.engine_args.block_size, (request.estimated_prefix_hit_num_blocks or 0)
* self.engine_args.block_size,
prefill_queue_size, prefill_queue_size,
) )
else: else:
...@@ -230,7 +231,7 @@ class VllmWorker: ...@@ -230,7 +231,7 @@ class VllmWorker:
else: else:
remote_prefill_params = None remote_prefill_params = None
logger.info( logger.info(
f"Prefilling locally for request {request_id} with length {len(request.token_ids)} (estimated prefix hit length {request.estimated_prefix_hit_num_blocks * self.engine_args.block_size})" f"Prefilling locally for request {request_id} with length {len(request.token_ids)} (estimated prefix hit length {(request.estimated_prefix_hit_num_blocks or 0) * self.engine_args.block_size})"
) )
sampling_params = SamplingParams(**self.default_sampling_params) sampling_params = SamplingParams(**self.default_sampling_params)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment