[CI] Expand OpenAI test_chat.py guided decoding tests (#11048)

Signed-off-by: mgoin <michael@neuralmagic.com>

[CI] Expand OpenAI test_chat.py guided decoding tests (#11048)
Signed-off-by: mgoin <michael@neuralmagic.com>
63afbe92 · Michael Goin · GitHub · 8cef6e02 · 63afbe92
Unverified Commit 63afbe92 authored Dec 23, 2024 by Michael Goin Committed by GitHub Dec 23, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 17 deletions

tests/entrypoints/openai/test_chat.py tests/entrypoints/openai/test_chat.py +12 -17

No files found.
--- a/tests/entrypoints/openai/test_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@@ -17,6 +17,8 @@ from .test_completion import zephyr_lora_files  # noqa: F401
 # any model with a chat template should work here
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
+GUIDED_DECODING_BACKENDS = ["outlines", "lm-format-enforcer", "xgrammar"]
 @pytest.fixture(scope="module")
 def server(zephyr_lora_files, zephyr_lora_added_tokens_files):  # noqa: F811
@@ -464,8 +466,7 @@ async def test_chat_completion_stream_options(client: openai.AsyncOpenAI,
 # will fail on the second `guided_decoding_backend` even when I swap their order
 # (ref: https://github.com/vllm-project/vllm/pull/5526#issuecomment-2173772256)
 @pytest.mark.asyncio
-@pytest.mark.parametrize("guided_decoding_backend",
+@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
-                         ["outlines", "lm-format-enforcer"])
 async def test_guided_choice_chat(client: openai.AsyncOpenAI,
                                  guided_decoding_backend: str,
                                  sample_guided_choice):
@@ -506,8 +507,7 @@ async def test_guided_choice_chat(client: openai.AsyncOpenAI,
 @pytest.mark.asyncio
-@pytest.mark.parametrize("guided_decoding_backend",
+@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
-                         ["outlines", "lm-format-enforcer"])
 async def test_guided_json_chat(client: openai.AsyncOpenAI,
                                guided_decoding_backend: str,
                                sample_json_schema):
@@ -554,8 +554,7 @@ async def test_guided_json_chat(client: openai.AsyncOpenAI,
 @pytest.mark.asyncio
-@pytest.mark.parametrize("guided_decoding_backend",
+@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
-                         ["outlines", "lm-format-enforcer"])
 async def test_guided_regex_chat(client: openai.AsyncOpenAI,
                                 guided_decoding_backend: str, sample_regex):
    messages = [{
@@ -613,8 +612,7 @@ async def test_guided_decoding_type_error(client: openai.AsyncOpenAI):
 @pytest.mark.asyncio
-@pytest.mark.parametrize("guided_decoding_backend",
+@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
-                         ["outlines", "lm-format-enforcer"])
 async def test_guided_choice_chat_logprobs(client: openai.AsyncOpenAI,
                                           guided_decoding_backend: str,
                                           sample_guided_choice):
@@ -646,8 +644,7 @@ async def test_guided_choice_chat_logprobs(client: openai.AsyncOpenAI,
 @pytest.mark.asyncio
-@pytest.mark.parametrize("guided_decoding_backend",
+@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
-                         ["outlines", "lm-format-enforcer"])
 async def test_named_tool_use(client: openai.AsyncOpenAI,
                              guided_decoding_backend: str,
                              sample_json_schema):
@@ -681,7 +678,8 @@ async def test_named_tool_use(client: openai.AsyncOpenAI,
            "function": {
                "name": "dummy_function_name"
            }
-        })
+        },
+        extra_body=dict(guided_decoding_backend=guided_decoding_backend))
    message = chat_completion.choices[0].message
    assert len(message.content) == 0
    json_string = message.tool_calls[0].function.arguments
@@ -716,6 +714,7 @@ async def test_named_tool_use(client: openai.AsyncOpenAI,
                "name": "dummy_function_name"
            }
        },
+        extra_body=dict(guided_decoding_backend=guided_decoding_backend),
        stream=True)
    output = []
@@ -738,10 +737,8 @@ async def test_named_tool_use(client: openai.AsyncOpenAI,
 @pytest.mark.asyncio
-@pytest.mark.parametrize("guided_decoding_backend", ["outlines"])
+async def test_required_tool_use_not_yet_supported(client: openai.AsyncOpenAI,
-async def test_required_tool_use_not_yet_supported(
+                                                   sample_json_schema):
-        client: openai.AsyncOpenAI, guided_decoding_backend: str,
-        sample_json_schema):
    messages = [{
        "role": "system",
        "content": "you are a helpful assistant"
@@ -785,9 +782,7 @@ async def test_required_tool_use_not_yet_supported(
 @pytest.mark.asyncio
-@pytest.mark.parametrize("guided_decoding_backend", ["outlines"])
 async def test_inconsistent_tool_choice_and_tools(client: openai.AsyncOpenAI,
-                                                  guided_decoding_backend: str,
                                                  sample_json_schema):
    messages = [{
        "role": "system",