"tests/tensorizer_loader/test_tensorizer.py" did not exist on "989ae2538df211ca3a31f77ac8e106c5c97c6e53"
Unverified Commit f04d5226 authored by Flora Feng's avatar Flora Feng Committed by GitHub
Browse files

[CI] Fix flaky tool_use chat completion tests with deterministic seed (#37027)


Signed-off-by: default avatarsfeng33 <4florafeng@gmail.com>
parent 0a0a1a19
......@@ -6,6 +6,7 @@ import pytest
from .utils import (
MESSAGES_WITHOUT_TOOLS,
SEED,
WEATHER_TOOL,
ServerConfig,
ensure_system_prompt,
......@@ -27,6 +28,7 @@ async def test_chat_completion_without_tools(
max_completion_tokens=150,
model=model_name,
logprobs=False,
seed=SEED,
)
choice = chat_completion.choices[0]
stop_reason = chat_completion.choices[0].finish_reason
......@@ -47,6 +49,7 @@ async def test_chat_completion_without_tools(
max_completion_tokens=150,
model=model_name,
logprobs=False,
seed=SEED,
stream=True,
)
chunks: list[str] = []
......@@ -97,6 +100,7 @@ async def test_chat_completion_with_tools(
model=model_name,
tools=[WEATHER_TOOL],
logprobs=False,
seed=SEED,
)
choice = chat_completion.choices[0]
stop_reason = chat_completion.choices[0].finish_reason
......@@ -118,6 +122,7 @@ async def test_chat_completion_with_tools(
model=model_name,
logprobs=False,
tools=[WEATHER_TOOL],
seed=SEED,
stream=True,
)
......
......@@ -10,6 +10,7 @@ from .utils import (
MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
MESSAGES_WITH_PARALLEL_TOOL_RESPONSE,
SEARCH_TOOL,
SEED,
WEATHER_TOOL,
ServerConfig,
)
......@@ -39,6 +40,7 @@ async def test_parallel_tool_calls(
model=model_name,
tools=[WEATHER_TOOL, SEARCH_TOOL],
logprobs=False,
seed=SEED,
)
choice = chat_completion.choices[0]
......@@ -76,6 +78,7 @@ async def test_parallel_tool_calls(
max_completion_tokens=200,
tools=[WEATHER_TOOL, SEARCH_TOOL],
logprobs=False,
seed=SEED,
stream=True,
)
......@@ -166,6 +169,7 @@ async def test_parallel_tool_calls_with_results(
model=model_name,
tools=[WEATHER_TOOL, SEARCH_TOOL],
logprobs=False,
seed=SEED,
)
choice = chat_completion.choices[0]
......@@ -184,6 +188,7 @@ async def test_parallel_tool_calls_with_results(
model=model_name,
tools=[WEATHER_TOOL, SEARCH_TOOL],
logprobs=False,
seed=SEED,
stream=True,
)
......@@ -229,6 +234,7 @@ async def test_parallel_tool_calls_false(client: openai.AsyncOpenAI):
model=model_name,
tools=[WEATHER_TOOL, SEARCH_TOOL],
logprobs=False,
seed=SEED,
parallel_tool_calls=False,
)
......@@ -247,6 +253,7 @@ async def test_parallel_tool_calls_false(client: openai.AsyncOpenAI):
max_completion_tokens=200,
tools=[WEATHER_TOOL, SEARCH_TOOL],
logprobs=False,
seed=SEED,
parallel_tool_calls=False,
stream=True,
)
......
......@@ -10,6 +10,7 @@ from .utils import (
MESSAGES_ASKING_FOR_TOOLS,
MESSAGES_WITH_TOOL_RESPONSE,
SEARCH_TOOL,
SEED,
WEATHER_TOOL,
)
......@@ -27,6 +28,7 @@ async def test_tool_call_and_choice(client: openai.AsyncOpenAI):
model=model_name,
tools=[WEATHER_TOOL, SEARCH_TOOL],
logprobs=False,
seed=SEED,
)
choice = chat_completion.choices[0]
......@@ -71,6 +73,7 @@ async def test_tool_call_and_choice(client: openai.AsyncOpenAI):
max_completion_tokens=100,
tools=[WEATHER_TOOL, SEARCH_TOOL],
logprobs=False,
seed=SEED,
stream=True,
)
......@@ -154,6 +157,7 @@ async def test_tool_call_with_results(client: openai.AsyncOpenAI):
model=model_name,
tools=[WEATHER_TOOL, SEARCH_TOOL],
logprobs=False,
seed=SEED,
)
choice = chat_completion.choices[0]
......@@ -171,6 +175,7 @@ async def test_tool_call_with_results(client: openai.AsyncOpenAI):
model=model_name,
tools=[WEATHER_TOOL, SEARCH_TOOL],
logprobs=False,
seed=SEED,
stream=True,
)
......
......@@ -42,6 +42,8 @@ def ensure_system_prompt(
# universal args for all models go here. also good if you need to test locally
# and change type or KV cache quantization or something.
SEED = 42
ARGS: list[str] = [
"--enable-auto-tool-choice",
"--max-model-len",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment