Responses harmony system message structured (#34268)

Signed-off-by: Adam Binford <adamq43@gmail.com>

Responses harmony system message structured (#34268)
Signed-off-by: Adam Binford <adamq43@gmail.com>
1b875656 · Adam Binford · GitHub · 275e0d2a · 1b875656 · 1b875656
Unverified Commit 1b875656 authored Feb 11, 2026 by Adam Binford Committed by GitHub Feb 11, 2026
Showing with 43 additions and 6 deletions

tests/entrypoints/openai/responses/test_harmony.py tests/entrypoints/openai/responses/test_harmony.py +29 -4

vllm/entrypoints/openai/responses/serving.py vllm/entrypoints/openai/responses/serving.py +14 -2

No files found.
--- a/tests/entrypoints/openai/responses/test_harmony.py
+++ b/tests/entrypoints/openai/responses/test_harmony.py
@@ -1302,16 +1302,17 @@ async def test_system_prompt_override(client: OpenAI, model_name: str):
        # Message structure may vary, skip this specific check
        pass
+    custom_system_prompt_2 = (
+        "You are a helpful assistant that always responds in exactly 5 words."
+    )
    # Test 3: Test with different custom system prompt
    response_2 = await client.responses.create(
        model=model_name,
        input=[
            {
                "role": "system",
-                "content": (
+                "content": custom_system_prompt_2,
-                    "You are a helpful assistant that always "
-                    "responds in exactly 5 words."
-                ),
            },
            {"role": "user", "content": "What is the weather like?"},
        ],
@@ -1328,3 +1329,27 @@ async def test_system_prompt_override(client: OpenAI, model_name: str):
    assert 3 <= word_count <= 8, (
        f"Expected around 5 words, got {word_count} words: {response_2.output_text}"
    )
+    # Test 4: Test with structured content
+    response_3 = await client.responses.create(
+        model=model_name,
+        input=[
+            {
+                "role": "system",
+                "content": [{"type": "input_text", "text": custom_system_prompt_2}],
+            },
+            {"role": "user", "content": "What is the weather like?"},
+        ],
+        temperature=0.0,
+    )
+    assert response_3 is not None
+    assert response_3.status == "completed"
+    assert response_3.output_text is not None
+    # Count words in response (approximately, allowing for punctuation)
+    word_count = len(response_3.output_text.split())
+    # Allow some flexibility (4-7 words) since the model might not be perfectly precise
+    assert 3 <= word_count <= 8, (
+        f"Expected around 5 words, got {word_count} words: {response_3.output_text}"
+    )
--- a/vllm/entrypoints/openai/responses/serving.py
+++ b/vllm/entrypoints/openai/responses/serving.py
@@ -980,7 +980,9 @@ class OpenAIServingResponses(OpenAIServing):
            output_items.extend(last_items)
        return output_items
-    def _extract_system_message_from_request(self, request) -> str | None:
+    def _extract_system_message_from_request(
+        self, request: ResponsesRequest
+    ) -> str | None:
        system_msg = None
        if not isinstance(request.input, str):
            for response_msg in request.input:
@@ -988,7 +990,17 @@ class OpenAIServingResponses(OpenAIServing):
                    isinstance(response_msg, dict)
                    and response_msg.get("role") == "system"
                ):
-                    system_msg = response_msg.get("content")
+                    content = response_msg.get("content")
+                    if isinstance(content, str):
+                        system_msg = content
+                    elif isinstance(content, list):
+                        for param in content:
+                            if (
+                                isinstance(param, dict)
+                                and param.get("type") == "input_text"
+                            ):
+                                system_msg = param.get("text")
+                                break
                    break
        return system_msg