seq.data._effective_length+=1

18b9f67c · guanyu1 · 6d0996e9 · 18b9f67c · 18b9f67c
Commit 18b9f67c authored Mar 24, 2025 by guanyu1
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 5 deletions

benchmarks/benchmark_throughput.py benchmarks/benchmark_throughput.py +1 -1

vllm/engine/llm_engine.py vllm/engine/llm_engine.py +4 -4

No files found.
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -179,7 +179,7 @@ def run_vllm(
    sampling_params: List[SamplingParams] = []
    for request in requests:
        prompts.append(
-            TextPrompt(prompt="helloworld",
+            TextPrompt(prompt="helloword",
                       multi_modal_data=request.multi_modal_data))
        sampling_params.append(
            SamplingParams(

--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -1514,9 +1514,9 @@ class LLMEngine:
                for token_id, seq_id in zip(sample_out_list, sample_out_ids):
                    if seq.seq_id == seq_id:
                        sample.output_token = token_id[0]
+                        seq.data._effective_length+=1
                        seq.fix_last_token_id(sample.output_token)
-                        self.fix_process_model_output(ctx_output_queue,ctx_request_outputs,
-            ctx_multi_step_stream_outputs)
+                        self.fix_process_model_output(ctx_output_queue,ctx_request_outputs,ctx_multi_step_stream_outputs)
                        break

    def _advance_to_next_step(
@@ -1613,8 +1613,8 @@ class LLMEngine:
                last_outputs_ids = last_outputs_ids, 
                last_outputs_sample = last_outputs_tensor)
            if allow_async_output_proc:
-                execute_model_req.async_callback = self.async_callbacks[
-                    virtual_engine]
+                    execute_model_req.async_callback = self.async_callbacks[
+                        virtual_engine]

            #profile.ProfRangeAutoPush('model_executor')
            outputs = self.model_executor.execute_model(