[Bugfix] Fix Responses API instructions leaking through previous_response_id (#37727)

Signed-off-by: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Co-authored-by: Chauncey <chaunceyjiang@gmail.com>

[Bugfix] Fix Responses API instructions leaking through previous_response_id (#37727)
Signed-off-by: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Co-authored-by: Chauncey <chaunceyjiang@gmail.com>
200a727e · Yufeng He · GitHub · edbc1abd · 200a727e · 200a727e
Unverified Commit 200a727e authored Apr 13, 2026 by Yufeng He Committed by GitHub Apr 13, 2026
Showing with 73 additions and 2 deletions

tests/entrypoints/openai/responses/test_responses_utils.py tests/entrypoints/openai/responses/test_responses_utils.py +69 -0

vllm/entrypoints/openai/responses/utils.py vllm/entrypoints/openai/responses/utils.py +4 -2

No files found.
--- a/tests/entrypoints/openai/responses/test_responses_utils.py
+++ b/tests/entrypoints/openai/responses/test_responses_utils.py
@@ -22,6 +22,7 @@ from vllm.entrypoints.openai.responses.utils import (
    _construct_single_message_from_response_item,
    _maybe_combine_reasoning_and_tool_call,
    construct_chat_messages_with_tool_call,
+    construct_input_messages,
    convert_tool_responses_to_completions_format,
    should_continue_final_message,
 )
@@ -738,3 +739,71 @@ class TestMaybeCombineReasoningAndToolCall:
        result = _maybe_combine_reasoning_and_tool_call(item, messages)
        assert result is None
+class TestConstructInputMessagesInstructionsLeak:
+    """Regression tests for #37697: instructions from a prior response
+    should NOT leak through previous_response_id."""
+    def test_old_instructions_stripped_from_prev_msg(self):
+        """System message in prev_msg must be dropped so the new request's
+        instructions are the only system message in the conversation."""
+        prev = [
+            {"role": "system", "content": "old instructions"},
+            {"role": "user", "content": "What is 2+2?"},
+            {"role": "assistant", "content": "4"},
+        ]
+        msgs = construct_input_messages(
+            request_instructions="new instructions",
+            request_input="What is 3+3?",
+            prev_msg=prev,
+        )
+        system_msgs = [m for m in msgs if m.get("role") == "system"]
+        assert len(system_msgs) == 1
+        assert system_msgs[0]["content"] == "new instructions"
+    def test_no_instructions_in_new_request(self):
+        """If the new request has no instructions, old ones should still
+        be stripped -- they must not carry over."""
+        prev = [
+            {"role": "system", "content": "old instructions"},
+            {"role": "user", "content": "Hi"},
+            {"role": "assistant", "content": "Hello"},
+        ]
+        msgs = construct_input_messages(
+            request_instructions=None,
+            request_input="What is 3+3?",
+            prev_msg=prev,
+        )
+        system_msgs = [m for m in msgs if m.get("role") == "system"]
+        assert len(system_msgs) == 0
+    def test_non_system_messages_preserved(self):
+        """User/assistant messages from prev_msg must remain intact."""
+        prev = [
+            {"role": "system", "content": "old instructions"},
+            {"role": "user", "content": "Hi"},
+            {"role": "assistant", "content": "Hello"},
+        ]
+        msgs = construct_input_messages(
+            request_instructions="new instructions",
+            request_input="Follow up",
+            prev_msg=prev,
+        )
+        roles = [m["role"] for m in msgs]
+        assert roles == ["system", "user", "assistant", "user"]
+        assert msgs[0]["content"] == "new instructions"
+        assert msgs[1]["content"] == "Hi"
+        assert msgs[2]["content"] == "Hello"
+        assert msgs[3]["content"] == "Follow up"
+    def test_no_prev_msg(self):
+        """Baseline: when there's no prev_msg, instructions work normally."""
+        msgs = construct_input_messages(
+            request_instructions="be helpful",
+            request_input="hello",
+            prev_msg=None,
+        )
+        assert len(msgs) == 2
+        assert msgs[0] == {"role": "system", "content": "be helpful"}
+        assert msgs[1] == {"role": "user", "content": "hello"}
--- a/vllm/entrypoints/openai/responses/utils.py
+++ b/vllm/entrypoints/openai/responses/utils.py
@@ -94,8 +94,10 @@ def construct_input_messages(
    # Prepend the conversation history.
    if prev_msg is not None:
-        # Add the previous messages.
+        # Filter out system messages from previous conversation -- per the
-        messages.extend(prev_msg)
+        # OpenAI spec, instructions should NOT carry over across responses.
+        # The current request's instructions (if any) were already added above.
+        messages.extend(m for m in prev_msg if m.get("role") != "system")
    if prev_response_output is not None:
        # Add the previous output.
        for output_item in prev_response_output: