"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "a92842454ca824ce6fcf356f31e3bf0daf53629b"
Commit 75b6adc9 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.9.2-dev-lzg' into 'v0.9.2-dev'

fix bug in zero-overhead core

See merge request dcutoolkit/deeplearing/vllm!192
parents ed4b88d5 7d1b0c5e
...@@ -177,11 +177,11 @@ def zero_overhead_update_from_output(scheduler:Scheduler, ...@@ -177,11 +177,11 @@ def zero_overhead_update_from_output(scheduler:Scheduler,
# loop can be a performance bottleneck. We should do our best to avoid # loop can be a performance bottleneck. We should do our best to avoid
# expensive operations inside the loop. # expensive operations inside the loop.
for request in scheduler.running: for request in scheduler.running:
req_id = request.request_id
if request.is_finished(): if request.is_finished():
if req_id in requsets_valid_token_len: if req_id in requsets_valid_token_len:
requsets_valid_token_len.pop(req_id) requsets_valid_token_len.pop(req_id)
continue continue
req_id = request.request_id
num_tokens_scheduled = num_scheduled_tokens.get(req_id, 0) num_tokens_scheduled = num_scheduled_tokens.get(req_id, 0)
if num_tokens_scheduled == 0: if num_tokens_scheduled == 0:
# The request was not scheduled in this step. # The request was not scheduled in this step.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment