Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f04d5226
Unverified
Commit
f04d5226
authored
Mar 16, 2026
by
Flora Feng
Committed by
GitHub
Mar 17, 2026
Browse files
[CI] Fix flaky tool_use chat completion tests with deterministic seed (#37027)
Signed-off-by:
sfeng33
<
4florafeng@gmail.com
>
parent
0a0a1a19
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
19 additions
and
0 deletions
+19
-0
tests/tool_use/test_chat_completions.py
tests/tool_use/test_chat_completions.py
+5
-0
tests/tool_use/test_parallel_tool_calls.py
tests/tool_use/test_parallel_tool_calls.py
+7
-0
tests/tool_use/test_tool_calls.py
tests/tool_use/test_tool_calls.py
+5
-0
tests/tool_use/utils.py
tests/tool_use/utils.py
+2
-0
No files found.
tests/tool_use/test_chat_completions.py
View file @
f04d5226
...
@@ -6,6 +6,7 @@ import pytest
...
@@ -6,6 +6,7 @@ import pytest
from
.utils
import
(
from
.utils
import
(
MESSAGES_WITHOUT_TOOLS
,
MESSAGES_WITHOUT_TOOLS
,
SEED
,
WEATHER_TOOL
,
WEATHER_TOOL
,
ServerConfig
,
ServerConfig
,
ensure_system_prompt
,
ensure_system_prompt
,
...
@@ -27,6 +28,7 @@ async def test_chat_completion_without_tools(
...
@@ -27,6 +28,7 @@ async def test_chat_completion_without_tools(
max_completion_tokens
=
150
,
max_completion_tokens
=
150
,
model
=
model_name
,
model
=
model_name
,
logprobs
=
False
,
logprobs
=
False
,
seed
=
SEED
,
)
)
choice
=
chat_completion
.
choices
[
0
]
choice
=
chat_completion
.
choices
[
0
]
stop_reason
=
chat_completion
.
choices
[
0
].
finish_reason
stop_reason
=
chat_completion
.
choices
[
0
].
finish_reason
...
@@ -47,6 +49,7 @@ async def test_chat_completion_without_tools(
...
@@ -47,6 +49,7 @@ async def test_chat_completion_without_tools(
max_completion_tokens
=
150
,
max_completion_tokens
=
150
,
model
=
model_name
,
model
=
model_name
,
logprobs
=
False
,
logprobs
=
False
,
seed
=
SEED
,
stream
=
True
,
stream
=
True
,
)
)
chunks
:
list
[
str
]
=
[]
chunks
:
list
[
str
]
=
[]
...
@@ -97,6 +100,7 @@ async def test_chat_completion_with_tools(
...
@@ -97,6 +100,7 @@ async def test_chat_completion_with_tools(
model
=
model_name
,
model
=
model_name
,
tools
=
[
WEATHER_TOOL
],
tools
=
[
WEATHER_TOOL
],
logprobs
=
False
,
logprobs
=
False
,
seed
=
SEED
,
)
)
choice
=
chat_completion
.
choices
[
0
]
choice
=
chat_completion
.
choices
[
0
]
stop_reason
=
chat_completion
.
choices
[
0
].
finish_reason
stop_reason
=
chat_completion
.
choices
[
0
].
finish_reason
...
@@ -118,6 +122,7 @@ async def test_chat_completion_with_tools(
...
@@ -118,6 +122,7 @@ async def test_chat_completion_with_tools(
model
=
model_name
,
model
=
model_name
,
logprobs
=
False
,
logprobs
=
False
,
tools
=
[
WEATHER_TOOL
],
tools
=
[
WEATHER_TOOL
],
seed
=
SEED
,
stream
=
True
,
stream
=
True
,
)
)
...
...
tests/tool_use/test_parallel_tool_calls.py
View file @
f04d5226
...
@@ -10,6 +10,7 @@ from .utils import (
...
@@ -10,6 +10,7 @@ from .utils import (
MESSAGES_ASKING_FOR_PARALLEL_TOOLS
,
MESSAGES_ASKING_FOR_PARALLEL_TOOLS
,
MESSAGES_WITH_PARALLEL_TOOL_RESPONSE
,
MESSAGES_WITH_PARALLEL_TOOL_RESPONSE
,
SEARCH_TOOL
,
SEARCH_TOOL
,
SEED
,
WEATHER_TOOL
,
WEATHER_TOOL
,
ServerConfig
,
ServerConfig
,
)
)
...
@@ -39,6 +40,7 @@ async def test_parallel_tool_calls(
...
@@ -39,6 +40,7 @@ async def test_parallel_tool_calls(
model
=
model_name
,
model
=
model_name
,
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
logprobs
=
False
,
logprobs
=
False
,
seed
=
SEED
,
)
)
choice
=
chat_completion
.
choices
[
0
]
choice
=
chat_completion
.
choices
[
0
]
...
@@ -76,6 +78,7 @@ async def test_parallel_tool_calls(
...
@@ -76,6 +78,7 @@ async def test_parallel_tool_calls(
max_completion_tokens
=
200
,
max_completion_tokens
=
200
,
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
logprobs
=
False
,
logprobs
=
False
,
seed
=
SEED
,
stream
=
True
,
stream
=
True
,
)
)
...
@@ -166,6 +169,7 @@ async def test_parallel_tool_calls_with_results(
...
@@ -166,6 +169,7 @@ async def test_parallel_tool_calls_with_results(
model
=
model_name
,
model
=
model_name
,
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
logprobs
=
False
,
logprobs
=
False
,
seed
=
SEED
,
)
)
choice
=
chat_completion
.
choices
[
0
]
choice
=
chat_completion
.
choices
[
0
]
...
@@ -184,6 +188,7 @@ async def test_parallel_tool_calls_with_results(
...
@@ -184,6 +188,7 @@ async def test_parallel_tool_calls_with_results(
model
=
model_name
,
model
=
model_name
,
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
logprobs
=
False
,
logprobs
=
False
,
seed
=
SEED
,
stream
=
True
,
stream
=
True
,
)
)
...
@@ -229,6 +234,7 @@ async def test_parallel_tool_calls_false(client: openai.AsyncOpenAI):
...
@@ -229,6 +234,7 @@ async def test_parallel_tool_calls_false(client: openai.AsyncOpenAI):
model
=
model_name
,
model
=
model_name
,
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
logprobs
=
False
,
logprobs
=
False
,
seed
=
SEED
,
parallel_tool_calls
=
False
,
parallel_tool_calls
=
False
,
)
)
...
@@ -247,6 +253,7 @@ async def test_parallel_tool_calls_false(client: openai.AsyncOpenAI):
...
@@ -247,6 +253,7 @@ async def test_parallel_tool_calls_false(client: openai.AsyncOpenAI):
max_completion_tokens
=
200
,
max_completion_tokens
=
200
,
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
logprobs
=
False
,
logprobs
=
False
,
seed
=
SEED
,
parallel_tool_calls
=
False
,
parallel_tool_calls
=
False
,
stream
=
True
,
stream
=
True
,
)
)
...
...
tests/tool_use/test_tool_calls.py
View file @
f04d5226
...
@@ -10,6 +10,7 @@ from .utils import (
...
@@ -10,6 +10,7 @@ from .utils import (
MESSAGES_ASKING_FOR_TOOLS
,
MESSAGES_ASKING_FOR_TOOLS
,
MESSAGES_WITH_TOOL_RESPONSE
,
MESSAGES_WITH_TOOL_RESPONSE
,
SEARCH_TOOL
,
SEARCH_TOOL
,
SEED
,
WEATHER_TOOL
,
WEATHER_TOOL
,
)
)
...
@@ -27,6 +28,7 @@ async def test_tool_call_and_choice(client: openai.AsyncOpenAI):
...
@@ -27,6 +28,7 @@ async def test_tool_call_and_choice(client: openai.AsyncOpenAI):
model
=
model_name
,
model
=
model_name
,
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
logprobs
=
False
,
logprobs
=
False
,
seed
=
SEED
,
)
)
choice
=
chat_completion
.
choices
[
0
]
choice
=
chat_completion
.
choices
[
0
]
...
@@ -71,6 +73,7 @@ async def test_tool_call_and_choice(client: openai.AsyncOpenAI):
...
@@ -71,6 +73,7 @@ async def test_tool_call_and_choice(client: openai.AsyncOpenAI):
max_completion_tokens
=
100
,
max_completion_tokens
=
100
,
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
logprobs
=
False
,
logprobs
=
False
,
seed
=
SEED
,
stream
=
True
,
stream
=
True
,
)
)
...
@@ -154,6 +157,7 @@ async def test_tool_call_with_results(client: openai.AsyncOpenAI):
...
@@ -154,6 +157,7 @@ async def test_tool_call_with_results(client: openai.AsyncOpenAI):
model
=
model_name
,
model
=
model_name
,
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
logprobs
=
False
,
logprobs
=
False
,
seed
=
SEED
,
)
)
choice
=
chat_completion
.
choices
[
0
]
choice
=
chat_completion
.
choices
[
0
]
...
@@ -171,6 +175,7 @@ async def test_tool_call_with_results(client: openai.AsyncOpenAI):
...
@@ -171,6 +175,7 @@ async def test_tool_call_with_results(client: openai.AsyncOpenAI):
model
=
model_name
,
model
=
model_name
,
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
tools
=
[
WEATHER_TOOL
,
SEARCH_TOOL
],
logprobs
=
False
,
logprobs
=
False
,
seed
=
SEED
,
stream
=
True
,
stream
=
True
,
)
)
...
...
tests/tool_use/utils.py
View file @
f04d5226
...
@@ -42,6 +42,8 @@ def ensure_system_prompt(
...
@@ -42,6 +42,8 @@ def ensure_system_prompt(
# universal args for all models go here. also good if you need to test locally
# universal args for all models go here. also good if you need to test locally
# and change type or KV cache quantization or something.
# and change type or KV cache quantization or something.
SEED
=
42
ARGS
:
list
[
str
]
=
[
ARGS
:
list
[
str
]
=
[
"--enable-auto-tool-choice"
,
"--enable-auto-tool-choice"
,
"--max-model-len"
,
"--max-model-len"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment