Commit 4b4fc875 authored by lizhigong's avatar lizhigong
Browse files

debug and fix erro

parent 4829dd84
......@@ -165,6 +165,7 @@ class Worker(WorkerBase):
# Construct the model runner
if envs.VLLM_ZERO_OVERHEAD:
logger.info('use zero overhead model_runner')
self.model_runner: GPUModelRunner = V1ZeroModelRunner(
self.vllm_config, self.device)
else:
......
......@@ -178,6 +178,8 @@ def zero_overhead_update_from_output(scheduler:Scheduler,
# expensive operations inside the loop.
for request in scheduler.running:
if request.is_finished():
if req_id in requsets_valid_token_len:
requsets_valid_token_len.pop(req_id)
continue
req_id = request.request_id
num_tokens_scheduled = num_scheduled_tokens.get(req_id, 0)
......@@ -212,6 +214,7 @@ def zero_overhead_update_from_output(scheduler:Scheduler,
if request.has_encoder_inputs:
scheduler._free_encoder_inputs(request)
stopped = False
new_logprobs = None
new_token_ids = generated_token_ids
kv_transfer_params = None
......@@ -292,8 +295,10 @@ def zero_overhead_update_from_output(scheduler:Scheduler,
kv_transfer_params=kv_transfer_params,
num_cached_tokens=request.num_cached_tokens,
))
if not stopped:
if stopped:
if req_id in requsets_valid_token_len:
requsets_valid_token_len.pop(req_id)
else:
new_running.append(request)
scheduler.running = new_running
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment