Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1b875656
Unverified
Commit
1b875656
authored
Feb 11, 2026
by
Adam Binford
Committed by
GitHub
Feb 11, 2026
Browse files
Responses harmony system message structured (#34268)
Signed-off-by:
Adam Binford
<
adamq43@gmail.com
>
parent
275e0d2a
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
43 additions
and
6 deletions
+43
-6
tests/entrypoints/openai/responses/test_harmony.py
tests/entrypoints/openai/responses/test_harmony.py
+29
-4
vllm/entrypoints/openai/responses/serving.py
vllm/entrypoints/openai/responses/serving.py
+14
-2
No files found.
tests/entrypoints/openai/responses/test_harmony.py
View file @
1b875656
...
@@ -1302,16 +1302,17 @@ async def test_system_prompt_override(client: OpenAI, model_name: str):
...
@@ -1302,16 +1302,17 @@ async def test_system_prompt_override(client: OpenAI, model_name: str):
# Message structure may vary, skip this specific check
# Message structure may vary, skip this specific check
pass
pass
custom_system_prompt_2
=
(
"You are a helpful assistant that always responds in exactly 5 words."
)
# Test 3: Test with different custom system prompt
# Test 3: Test with different custom system prompt
response_2
=
await
client
.
responses
.
create
(
response_2
=
await
client
.
responses
.
create
(
model
=
model_name
,
model
=
model_name
,
input
=
[
input
=
[
{
{
"role"
:
"system"
,
"role"
:
"system"
,
"content"
:
(
"content"
:
custom_system_prompt_2
,
"You are a helpful assistant that always "
"responds in exactly 5 words."
),
},
},
{
"role"
:
"user"
,
"content"
:
"What is the weather like?"
},
{
"role"
:
"user"
,
"content"
:
"What is the weather like?"
},
],
],
...
@@ -1328,3 +1329,27 @@ async def test_system_prompt_override(client: OpenAI, model_name: str):
...
@@ -1328,3 +1329,27 @@ async def test_system_prompt_override(client: OpenAI, model_name: str):
assert
3
<=
word_count
<=
8
,
(
assert
3
<=
word_count
<=
8
,
(
f
"Expected around 5 words, got
{
word_count
}
words:
{
response_2
.
output_text
}
"
f
"Expected around 5 words, got
{
word_count
}
words:
{
response_2
.
output_text
}
"
)
)
# Test 4: Test with structured content
response_3
=
await
client
.
responses
.
create
(
model
=
model_name
,
input
=
[
{
"role"
:
"system"
,
"content"
:
[{
"type"
:
"input_text"
,
"text"
:
custom_system_prompt_2
}],
},
{
"role"
:
"user"
,
"content"
:
"What is the weather like?"
},
],
temperature
=
0.0
,
)
assert
response_3
is
not
None
assert
response_3
.
status
==
"completed"
assert
response_3
.
output_text
is
not
None
# Count words in response (approximately, allowing for punctuation)
word_count
=
len
(
response_3
.
output_text
.
split
())
# Allow some flexibility (4-7 words) since the model might not be perfectly precise
assert
3
<=
word_count
<=
8
,
(
f
"Expected around 5 words, got
{
word_count
}
words:
{
response_3
.
output_text
}
"
)
vllm/entrypoints/openai/responses/serving.py
View file @
1b875656
...
@@ -980,7 +980,9 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -980,7 +980,9 @@ class OpenAIServingResponses(OpenAIServing):
output_items
.
extend
(
last_items
)
output_items
.
extend
(
last_items
)
return
output_items
return
output_items
def
_extract_system_message_from_request
(
self
,
request
)
->
str
|
None
:
def
_extract_system_message_from_request
(
self
,
request
:
ResponsesRequest
)
->
str
|
None
:
system_msg
=
None
system_msg
=
None
if
not
isinstance
(
request
.
input
,
str
):
if
not
isinstance
(
request
.
input
,
str
):
for
response_msg
in
request
.
input
:
for
response_msg
in
request
.
input
:
...
@@ -988,7 +990,17 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -988,7 +990,17 @@ class OpenAIServingResponses(OpenAIServing):
isinstance
(
response_msg
,
dict
)
isinstance
(
response_msg
,
dict
)
and
response_msg
.
get
(
"role"
)
==
"system"
and
response_msg
.
get
(
"role"
)
==
"system"
):
):
system_msg
=
response_msg
.
get
(
"content"
)
content
=
response_msg
.
get
(
"content"
)
if
isinstance
(
content
,
str
):
system_msg
=
content
elif
isinstance
(
content
,
list
):
for
param
in
content
:
if
(
isinstance
(
param
,
dict
)
and
param
.
get
(
"type"
)
==
"input_text"
):
system_msg
=
param
.
get
(
"text"
)
break
break
break
return
system_msg
return
system_msg
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment