Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
27a009bb
"model.properties" did not exist on "37fc35b35692efeeb1f47d5fbde08d700efadeca"
Unverified
Commit
27a009bb
authored
Apr 15, 2025
by
Chang Su
Committed by
GitHub
Apr 15, 2025
Browse files
Fix ignore_eos parameter when loading a chat template (#5264)
parent
8ec0bb7d
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
76 additions
and
1 deletion
+76
-1
python/sglang/srt/openai_api/adapter.py
python/sglang/srt/openai_api/adapter.py
+2
-1
test/srt/test_openai_server.py
test/srt/test_openai_server.py
+74
-0
No files found.
python/sglang/srt/openai_api/adapter.py
View file @
27a009bb
...
...
@@ -995,7 +995,8 @@ def v1_chat_generate_request(
image_data
=
conv
.
image_data
audio_data
=
conv
.
audio_data
modalities
=
conv
.
modalities
stop
=
conv
.
stop_str
or
[]
stop
=
conv
.
stop_str
or
[]
if
not
request
.
ignore_eos
else
[]
if
request
.
stop
:
if
isinstance
(
request
.
stop
,
str
):
stop
.
append
(
request
.
stop
)
...
...
test/srt/test_openai_server.py
View file @
27a009bb
...
...
@@ -676,5 +676,79 @@ class TestOpenAIEmbedding(CustomTestCase):
self
.
assertTrue
(
len
(
response
.
data
[
1
].
embedding
)
>
0
)
class
TestOpenAIServerIgnoreEOS
(
CustomTestCase
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--chat-template=llama_3_vision"
],
)
cls
.
base_url
+=
"/v1"
cls
.
tokenizer
=
get_tokenizer
(
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
)
@
classmethod
def
tearDownClass
(
cls
):
kill_process_tree
(
cls
.
process
.
pid
)
def
test_ignore_eos
(
self
):
"""
Test that ignore_eos=True allows generation to continue beyond EOS token
and reach the max_tokens limit.
"""
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
self
.
base_url
)
max_tokens
=
200
response_default
=
client
.
chat
.
completions
.
create
(
model
=
self
.
model
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
"Count from 1 to 20."
},
],
temperature
=
0
,
max_tokens
=
max_tokens
,
extra_body
=
{
"ignore_eos"
:
False
},
)
response_ignore_eos
=
client
.
chat
.
completions
.
create
(
model
=
self
.
model
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
"Count from 1 to 20."
},
],
temperature
=
0
,
max_tokens
=
max_tokens
,
extra_body
=
{
"ignore_eos"
:
True
},
)
default_tokens
=
len
(
self
.
tokenizer
.
encode
(
response_default
.
choices
[
0
].
message
.
content
)
)
ignore_eos_tokens
=
len
(
self
.
tokenizer
.
encode
(
response_ignore_eos
.
choices
[
0
].
message
.
content
)
)
# Check if ignore_eos resulted in more tokens or exactly max_tokens
# The ignore_eos response should either:
# 1. Have more tokens than the default response (if default stopped at EOS before max_tokens)
# 2. Have exactly max_tokens (if it reached the max_tokens limit)
self
.
assertTrue
(
ignore_eos_tokens
>
default_tokens
or
ignore_eos_tokens
>=
max_tokens
,
f
"ignore_eos did not generate more tokens:
{
ignore_eos_tokens
}
vs
{
default_tokens
}
"
,
)
self
.
assertEqual
(
response_ignore_eos
.
choices
[
0
].
finish_reason
,
"length"
,
f
"Expected finish_reason='length' for ignore_eos=True, got
{
response_ignore_eos
.
choices
[
0
].
finish_reason
}
"
,
)
if
__name__
==
"__main__"
:
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment