Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
14fc7a68
Unverified
Commit
14fc7a68
authored
Jan 10, 2026
by
Ning Xie
Committed by
GitHub
Jan 10, 2026
Browse files
[Bugfix] fix offline chat output prompt (#32076)
Signed-off-by:
Andy Xie
<
andy.xning@gmail.com
>
parent
5f2385a4
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
7 deletions
+13
-7
examples/offline_inference/basic/chat.py
examples/offline_inference/basic/chat.py
+13
-7
No files found.
examples/offline_inference/basic/chat.py
View file @
14fc7a68
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
vllm
import
LLM
,
EngineArgs
from
vllm
import
LLM
,
EngineArgs
from
vllm.outputs
import
RequestOutput
from
vllm.utils.argparse_utils
import
FlexibleArgumentParser
from
vllm.utils.argparse_utils
import
FlexibleArgumentParser
...
@@ -44,12 +45,12 @@ def main(args: dict):
...
@@ -44,12 +45,12 @@ def main(args: dict):
if
top_k
is
not
None
:
if
top_k
is
not
None
:
sampling_params
.
top_k
=
top_k
sampling_params
.
top_k
=
top_k
def
print_outputs
(
outputs
):
def
print_outputs
(
outputs
:
list
[
RequestOutput
],
prompts
:
list
):
assert
len
(
outputs
)
==
len
(
prompts
)
print
(
"
\n
Generated Outputs:
\n
"
+
"-"
*
80
)
print
(
"
\n
Generated Outputs:
\n
"
+
"-"
*
80
)
for
output
in
outputs
:
for
i
,
output
in
enumerate
(
outputs
):
prompt
=
output
.
prompt
generated_text
=
output
.
outputs
[
0
].
text
generated_text
=
output
.
outputs
[
0
].
text
print
(
f
"Prompt:
{
prompt
!
r
}
\n
"
)
print
(
f
"Prompt:
{
prompt
s
[
i
]
!
r
}
\n
"
)
print
(
f
"Generated text:
{
generated_text
!
r
}
"
)
print
(
f
"Generated text:
{
generated_text
!
r
}
"
)
print
(
"-"
*
80
)
print
(
"-"
*
80
)
...
@@ -66,14 +67,19 @@ def main(args: dict):
...
@@ -66,14 +67,19 @@ def main(args: dict):
},
},
]
]
outputs
=
llm
.
chat
(
conversation
,
sampling_params
,
use_tqdm
=
False
)
outputs
=
llm
.
chat
(
conversation
,
sampling_params
,
use_tqdm
=
False
)
print_outputs
(
outputs
)
print_outputs
(
outputs
,
[
conversation
,
],
)
# You can run batch inference with llm.chat API
# You can run batch inference with llm.chat API
conversations
=
[
conversation
for
_
in
range
(
10
)]
conversations
=
[
conversation
for
_
in
range
(
10
)]
# We turn on tqdm progress bar to verify it's indeed running batch inference
# We turn on tqdm progress bar to verify it's indeed running batch inference
outputs
=
llm
.
chat
(
conversations
,
sampling_params
,
use_tqdm
=
True
)
outputs
=
llm
.
chat
(
conversations
,
sampling_params
,
use_tqdm
=
True
)
print_outputs
(
outputs
)
print_outputs
(
outputs
,
conversations
)
# A chat template can be optionally supplied.
# A chat template can be optionally supplied.
# If not, the model will use its default chat template.
# If not, the model will use its default chat template.
...
@@ -87,7 +93,7 @@ def main(args: dict):
...
@@ -87,7 +93,7 @@ def main(args: dict):
use_tqdm
=
False
,
use_tqdm
=
False
,
chat_template
=
chat_template
,
chat_template
=
chat_template
,
)
)
print_outputs
(
outputs
)
print_outputs
(
outputs
,
conversations
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment