Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
18903216
Unverified
Commit
18903216
authored
Nov 07, 2025
by
Benjamin Chislett
Committed by
GitHub
Nov 07, 2025
Browse files
[Bugfix] Fix and add tests for GptOss reasoning parser (#28000)
Signed-off-by:
Benjamin Chislett
<
bchislett@nvidia.com
>
parent
d0ceb38a
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
151 additions
and
7 deletions
+151
-7
tests/reasoning/test_gptoss_reasoning_parser.py
tests/reasoning/test_gptoss_reasoning_parser.py
+127
-0
vllm/reasoning/gptoss_reasoning_parser.py
vllm/reasoning/gptoss_reasoning_parser.py
+24
-7
No files found.
tests/reasoning/test_gptoss_reasoning_parser.py
0 → 100644
View file @
18903216
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
from
transformers
import
AutoTokenizer
from
vllm.reasoning
import
ReasoningParser
from
vllm.reasoning.gptoss_reasoning_parser
import
GptOssReasoningParser
REASONING_MODEL_NAME
=
"openai/gpt-oss-120b"
@
pytest
.
fixture
(
scope
=
"module"
)
def
gpt_oss_tokenizer
():
return
AutoTokenizer
.
from_pretrained
(
REASONING_MODEL_NAME
)
USER_MESSAGE_START
=
"<|start|>user<|message|>"
REASONING_SECTION_START
=
"<|end|><|start|>assistant<|channel|>analysis<|message|>"
ASSISTANT_CONTENT_START_PREFIX
=
"<|end|><|start|>assistant<|channel|>final"
ASSISTANT_CONTENT_START_SUFFIX
=
"<|message|>"
ASSISTANT_CONTENT_START
=
(
ASSISTANT_CONTENT_START_PREFIX
+
ASSISTANT_CONTENT_START_SUFFIX
)
BASIC_CONTENT
=
{
"output"
:
REASONING_SECTION_START
+
"This is reasoning"
+
ASSISTANT_CONTENT_START
+
"This is the rest"
,
"is_reasoning_end"
:
True
,
}
BASIC_REASONING_ONLY
=
{
"output"
:
REASONING_SECTION_START
+
"This is reasoning"
+
"<|end|>"
,
"is_reasoning_end"
:
False
,
}
BASIC_NO_REASONING_NO_ASSISTANT
=
{
"output"
:
USER_MESSAGE_START
+
"This is a user message"
,
"is_reasoning_end"
:
False
,
}
# Edge-case where the model omits the assistant tag entirely.
BASIC_NO_REASONING_ASSISTANT
=
{
"output"
:
USER_MESSAGE_START
+
"This is a user message<|end|><|channel|>final"
,
"is_reasoning_end"
:
True
,
}
COMPLEX_CONTENT_INCOMPLETE_PREFIX_ONLY
=
{
"output"
:
REASONING_SECTION_START
+
"This is reasoning"
+
ASSISTANT_CONTENT_START_PREFIX
,
"is_reasoning_end"
:
False
,
}
COMPLEX_CONTENT_SUFFIX_ONLY
=
{
"output"
:
REASONING_SECTION_START
+
"This is reasoning"
+
ASSISTANT_CONTENT_START_SUFFIX
,
"is_reasoning_end"
:
False
,
}
COMPLEX_CONTENT_1_NO_SUFFIX
=
{
"output"
:
REASONING_SECTION_START
+
"This is reasoning"
+
ASSISTANT_CONTENT_START_PREFIX
+
"<|constrain|> JSON "
,
"is_reasoning_end"
:
False
,
}
COMPLEX_CONTENT_1
=
{
"output"
:
REASONING_SECTION_START
+
"This is reasoning"
+
ASSISTANT_CONTENT_START_PREFIX
+
"<|constrain|> JSON "
+
ASSISTANT_CONTENT_START_SUFFIX
,
"is_reasoning_end"
:
True
,
}
COMPLEX_CONTENT_1_WITH_CONTENT
=
{
"output"
:
REASONING_SECTION_START
+
"This is reasoning"
+
ASSISTANT_CONTENT_START_PREFIX
+
"<|constrain|> JSON "
+
ASSISTANT_CONTENT_START_SUFFIX
+
"This is the rest"
,
"is_reasoning_end"
:
True
,
}
COMPLEX_CONTENT_2
=
{
"output"
:
REASONING_SECTION_START
+
"This is reasoning"
+
ASSISTANT_CONTENT_START_PREFIX
+
"<|constrain|>ReplyAction "
+
ASSISTANT_CONTENT_START_SUFFIX
+
"This is the rest"
,
"is_reasoning_end"
:
True
,
}
TEST_CASES
=
[
BASIC_CONTENT
,
BASIC_REASONING_ONLY
,
COMPLEX_CONTENT_INCOMPLETE_PREFIX_ONLY
,
COMPLEX_CONTENT_SUFFIX_ONLY
,
COMPLEX_CONTENT_1_NO_SUFFIX
,
COMPLEX_CONTENT_1
,
COMPLEX_CONTENT_1_WITH_CONTENT
,
COMPLEX_CONTENT_2
,
]
@
pytest
.
mark
.
parametrize
(
"output, is_reasoning_end"
,
[(
t
[
"output"
],
t
[
"is_reasoning_end"
])
for
t
in
TEST_CASES
],
)
def
test_gptoss_is_reasoning_end
(
output
,
is_reasoning_end
,
gpt_oss_tokenizer
,
):
output
=
gpt_oss_tokenizer
.
tokenize
(
output
)
parser
:
ReasoningParser
=
GptOssReasoningParser
(
gpt_oss_tokenizer
)
# Test is_reasoning_end
output_ids
=
gpt_oss_tokenizer
.
convert_tokens_to_ids
(
output
)
actual_is_reasoning_end
=
parser
.
is_reasoning_end
(
output_ids
)
assert
is_reasoning_end
==
actual_is_reasoning_end
vllm/reasoning/gptoss_reasoning_parser.py
View file @
18903216
...
...
@@ -67,17 +67,34 @@ class GptOssReasoningParser(ReasoningParser):
def
__init__
(
self
,
tokenizer
:
PreTrainedTokenizerBase
,
*
args
,
**
kwargs
):
super
().
__init__
(
tokenizer
,
*
args
,
**
kwargs
)
self
.
reasoning_end_token_ids
=
self
.
model_tokenizer
.
encode
(
"<|start|>assistant<|channel|>final<|message|>"
# The model can output some special tokens between "final" and "<|message|>"
# So we need to look for both sequences to determine the end of reasoning.
self
.
reasoning_end_token_ids_prefix
=
self
.
model_tokenizer
.
encode
(
"<|channel|>final"
)
self
.
reasoning_end_token_ids_suffix
=
self
.
model_tokenizer
.
encode
(
"<|message|>"
)
self
.
reasoning_max_num_between_tokens
=
20
def
is_reasoning_end
(
self
,
input_ids
:
list
[
int
])
->
bool
:
end_token_ids
=
self
.
reasoning_end_token_ids
assert
len
(
end_token_ids
)
>
0
,
"reasoning_end_token_ids is empty"
end_token_ids_prefix
=
self
.
reasoning_end_token_ids_prefix
end_token_ids_suffix
=
self
.
reasoning_end_token_ids_suffix
assert
len
(
end_token_ids_prefix
)
>
0
,
"reasoning_end_token_ids_prefix is empty"
assert
len
(
end_token_ids_suffix
)
>
0
,
"reasoning_end_token_ids_suffix is empty"
# Check if the end sequence is present in the input_ids.
# We search from the end of input_ids to find the last match.
for
i
in
range
(
len
(
input_ids
)
-
len
(
end_token_ids
),
-
1
,
-
1
):
if
input_ids
[
i
:
i
+
len
(
end_token_ids
)]
==
end_token_ids
:
for
i
in
range
(
len
(
input_ids
)
-
len
(
end_token_ids_prefix
),
-
1
,
-
1
):
if
input_ids
[
i
:
i
+
len
(
end_token_ids_prefix
)]
==
end_token_ids_prefix
:
# We have found the prefix, now we look for the suffix after the prefix.
suffix_start
=
i
+
len
(
end_token_ids_prefix
)
for
j
in
range
(
suffix_start
,
len
(
input_ids
)
-
len
(
end_token_ids_suffix
)
+
1
):
if
j
-
suffix_start
>=
self
.
reasoning_max_num_between_tokens
:
break
if
(
input_ids
[
j
:
j
+
len
(
end_token_ids_suffix
)]
==
end_token_ids_suffix
):
return
True
return
False
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment