[Bugfix] Pass json-schema to GuidedDecodingParams and make test stronger (#9530)

5b59fe0f · Chen Zhang · GitHub · 8e3e7f27 · 5b59fe0f · 5b59fe0f
Unverified Commit 5b59fe0f authored Oct 19, 2024 by Chen Zhang Committed by GitHub Oct 20, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 29 additions and 9 deletions

tests/entrypoints/openai/test_chat.py tests/entrypoints/openai/test_chat.py +18 -4

vllm/entrypoints/openai/protocol.py vllm/entrypoints/openai/protocol.py +11 -5

No files found.
--- a/tests/entrypoints/openai/test_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@@ -851,14 +851,28 @@ async def test_response_format_json_object(client: openai.AsyncOpenAI):

 @pytest.mark.asyncio
 async def test_response_format_json_schema(client: openai.AsyncOpenAI):
+    prompt = 'what is 1+1? The format is "result": 2'
+    # Check that this prompt cannot lead to a valid JSON without json_schema
    for _ in range(2):
        resp = await client.chat.completions.create(
            model=MODEL_NAME,
            messages=[{
-                "role":
-                "user",
-                "content": ('what is 1+1? please respond with a JSON object, '
-                            'the format is {"result": 2}')
+                "role": "user",
+                "content": prompt
+            }],
+        )
+        content = resp.choices[0].message.content
+        assert content is not None
+        with pytest.raises((json.JSONDecodeError, AssertionError)):
+            loaded = json.loads(content)
+            assert loaded == {"result": 2}, loaded
+
+    for _ in range(2):
+        resp = await client.chat.completions.create(
+            model=MODEL_NAME,
+            messages=[{
+                "role": "user",
+                "content": prompt
            }],
            response_format={
                "type": "json_schema",

--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -314,9 +314,15 @@ class ChatCompletionRequest(OpenAIBaseModel):
            prompt_logprobs = self.top_logprobs

        guided_json_object = None
-        if (self.response_format is not None
-                and self.response_format.type == "json_object"):
-            guided_json_object = True
+        if self.response_format is not None:
+            if self.response_format.type == "json_object":
+                guided_json_object = True
+            elif self.response_format.type == "json_schema":
+                json_schema = self.response_format.json_schema
+                assert json_schema is not None
+                self.guided_json = json_schema.json_schema
+                if self.guided_decoding_backend is None:
+                    self.guided_decoding_backend = "lm-format-enforcer"

        guided_decoding = GuidedDecodingParams.from_optional(
            json=self._get_guided_json_from_tool() or self.guided_json,
@@ -537,8 +543,8 @@ class CompletionRequest(OpenAIBaseModel):
        default=None,
        description=
        ("Similar to chat completion, this parameter specifies the format of "
-         "output. Only {'type': 'json_object'} or {'type': 'text' } is "
-         "supported."),
+         "output. Only {'type': 'json_object'}, {'type': 'json_schema'} or "
+         "{'type': 'text' } is supported."),
    )
    guided_json: Optional[Union[str, dict, BaseModel]] = Field(
        default=None,