[Bugfix] Backport request id validation to v0 (#11036)

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>

[Bugfix] Backport request id validation to v0 (#11036)
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
9b9cef31 · Joe Runde · GitHub · d05f8867 · 9b9cef31 · 9b9cef31
Unverified Commit 9b9cef31 authored Dec 10, 2024 by Joe Runde Committed by GitHub Dec 10, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 1 deletion

vllm/engine/multiprocessing/client.py vllm/engine/multiprocessing/client.py +4 -0

vllm/v1/engine/async_llm.py vllm/v1/engine/async_llm.py +1 -1

No files found.
--- a/vllm/engine/multiprocessing/client.py
+++ b/vllm/engine/multiprocessing/client.py
@@ -576,6 +576,10 @@ class MQLLMEngineClient(EngineClient):
        if self._errored_with is not None:
            raise ENGINE_DEAD_ERROR(self._errored_with)
+        # Ensure the request id is unique among running requests
+        if request_id in self.output_queues:
+            raise ValueError(f"Request {request_id} already exists")
        # Constructing guided decoding logits processors is expensive, so we do
        # it here to avoid contending with cpu resources and the GIL on the
        # backend process.

--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@@ -152,7 +152,7 @@ class AsyncLLM(EngineClient):
        """Add new request to the AsyncLLM."""
        if self.detokenizer.is_request_active(request_id):
-            raise KeyError(f"Request {request_id} already exists.")
+            raise ValueError(f"Request {request_id} already exists.")
        # 1) Create a new AsyncStream for the request.
        stream = self._add_request_to_streams(request_id)