Commit e70b0ea0 authored by zhuwenwen's avatar zhuwenwen
Browse files

debug and fix erro

parent 1555157e
...@@ -189,6 +189,7 @@ class Worker(WorkerBase): ...@@ -189,6 +189,7 @@ class Worker(WorkerBase):
# Construct the model runner # Construct the model runner
if envs.VLLM_ZERO_OVERHEAD: if envs.VLLM_ZERO_OVERHEAD:
logger.info('use zero overhead model_runner')
self.model_runner: GPUModelRunner = V1ZeroModelRunner( self.model_runner: GPUModelRunner = V1ZeroModelRunner(
self.vllm_config, self.device) self.vllm_config, self.device)
else: else:
......
...@@ -175,6 +175,8 @@ def zero_overhead_update_from_output(scheduler:Scheduler, ...@@ -175,6 +175,8 @@ def zero_overhead_update_from_output(scheduler:Scheduler,
# expensive operations inside the loop. # expensive operations inside the loop.
for request in scheduler.running: for request in scheduler.running:
if request.is_finished(): if request.is_finished():
if req_id in requsets_valid_token_len:
requsets_valid_token_len.pop(req_id)
continue continue
req_id = request.request_id req_id = request.request_id
num_tokens_scheduled = num_scheduled_tokens.get(req_id, 0) num_tokens_scheduled = num_scheduled_tokens.get(req_id, 0)
...@@ -209,6 +211,7 @@ def zero_overhead_update_from_output(scheduler:Scheduler, ...@@ -209,6 +211,7 @@ def zero_overhead_update_from_output(scheduler:Scheduler,
if request.has_encoder_inputs: if request.has_encoder_inputs:
scheduler._free_encoder_inputs(request) scheduler._free_encoder_inputs(request)
stopped = False
new_logprobs = None new_logprobs = None
new_token_ids = generated_token_ids new_token_ids = generated_token_ids
kv_transfer_params = None kv_transfer_params = None
...@@ -290,7 +293,10 @@ def zero_overhead_update_from_output(scheduler:Scheduler, ...@@ -290,7 +293,10 @@ def zero_overhead_update_from_output(scheduler:Scheduler,
num_cached_tokens=request.num_cached_tokens, num_cached_tokens=request.num_cached_tokens,
)) ))
if not stopped: if stopped:
if req_id in requsets_valid_token_len:
requsets_valid_token_len.pop(req_id)
else:
new_running.append(request) new_running.append(request)
scheduler.running = new_running scheduler.running = new_running
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment