fix pending error in zero overhead

e6bbec9e · lizhigong · 2825dacd · e6bbec9e · e6bbec9e
Commit e6bbec9e authored Mar 28, 2025 by lizhigong
Show whitespace changes
Inline Side-by-side

Showing with 12 additions and 7 deletions

benchmarks/benchmark_serving.py benchmarks/benchmark_serving.py +2 -0

vllm/sequence.py vllm/sequence.py +10 -7

No files found.
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -570,6 +570,8 @@ async def benchmark(
    else:
        print("Initial test run completed. Starting main benchmark run...")

+    time.sleep(0.1) # ZERO_OVERHEAD : sleep and wait the last step in warmup
+    
    if profile:
        print("Starting profiler...")
        profile_input = RequestFuncInput(model=model_id,

--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -315,6 +315,7 @@ class SequenceData(msgspec.Struct,
        effect_offset = self._effective_length - len(self.output_token_ids)
        if effect_offset < 0:
            self._output_token_ids[effect_offset] = token_id
+            if len(self._new_appended_tokens) >= effect_offset * -1:
                self._new_appended_tokens[effect_offset] = token_id
            self._cached_all_token_ids[effect_offset] = token_id
            self._effective_length += 1
@@ -848,6 +849,7 @@ class SequenceGroup:
    def set_last_token_time(self, now: float) -> None:
        """Sets the last token time for Request level timings."""
        # If still in prefill phase, assertion fails.
+        if not self.seqs[0].zero_overhead:
            assert not self.is_prefill(), (
                "seq_group.set_last_token_time() should not be called "
                "if the seq_group is in prefill phase.")
@@ -856,6 +858,7 @@ class SequenceGroup:

    def get_last_token_latency(self) -> float:
        """Returns the latency of the last token."""
+        if not self.seqs[0].zero_overhead:
            assert not self.is_prefill(), (
                "seq_group.get_last_token_latency() should not be called "
                "if the seq_group is in prefill phase.")