Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f510395b
Unverified
Commit
f510395b
authored
Mar 30, 2024
by
Roy
Committed by
GitHub
Mar 29, 2024
Browse files
[BugFix][Frontend] Fix completion logprobs=0 error (#3731)
parent
6110c39d
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
26 additions
and
7 deletions
+26
-7
tests/entrypoints/test_openai_server.py
tests/entrypoints/test_openai_server.py
+21
-0
vllm/entrypoints/openai/protocol.py
vllm/entrypoints/openai/protocol.py
+1
-1
vllm/entrypoints/openai/serving_completion.py
vllm/entrypoints/openai/serving_completion.py
+0
-3
vllm/model_executor/layers/sampler.py
vllm/model_executor/layers/sampler.py
+3
-2
vllm/outputs.py
vllm/outputs.py
+1
-1
No files found.
tests/entrypoints/test_openai_server.py
View file @
f510395b
...
@@ -199,6 +199,27 @@ async def test_single_completion(server, client: openai.AsyncOpenAI,
...
@@ -199,6 +199,27 @@ async def test_single_completion(server, client: openai.AsyncOpenAI,
completion
.
choices
[
0
].
text
)
>=
5
completion
.
choices
[
0
].
text
)
>=
5
@
pytest
.
mark
.
parametrize
(
# first test base model, then test loras
"model_name"
,
[
MODEL_NAME
,
"zephyr-lora"
,
"zephyr-lora2"
],
)
async
def
test_zero_logprobs
(
server
,
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
# test using token IDs
completion
=
await
client
.
completions
.
create
(
model
=
MODEL_NAME
,
prompt
=
[
0
,
0
,
0
,
0
,
0
],
max_tokens
=
5
,
temperature
=
0.0
,
logprobs
=
0
,
)
choice
=
completion
.
choices
[
0
]
assert
choice
.
logprobs
is
not
None
assert
choice
.
logprobs
.
token_logprobs
is
not
None
assert
choice
.
logprobs
.
top_logprobs
is
None
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
# just test 1 lora hereafter
# just test 1 lora hereafter
"model_name"
,
"model_name"
,
...
...
vllm/entrypoints/openai/protocol.py
View file @
f510395b
...
@@ -330,7 +330,7 @@ class LogProbs(BaseModel):
...
@@ -330,7 +330,7 @@ class LogProbs(BaseModel):
text_offset
:
List
[
int
]
=
Field
(
default_factory
=
list
)
text_offset
:
List
[
int
]
=
Field
(
default_factory
=
list
)
token_logprobs
:
List
[
Optional
[
float
]]
=
Field
(
default_factory
=
list
)
token_logprobs
:
List
[
Optional
[
float
]]
=
Field
(
default_factory
=
list
)
tokens
:
List
[
str
]
=
Field
(
default_factory
=
list
)
tokens
:
List
[
str
]
=
Field
(
default_factory
=
list
)
top_logprobs
:
Optional
[
List
[
Optional
[
Dict
[
int
,
float
]]]]
=
None
top_logprobs
:
Optional
[
List
[
Optional
[
Dict
[
str
,
float
]]]]
=
None
class
CompletionResponseChoice
(
BaseModel
):
class
CompletionResponseChoice
(
BaseModel
):
...
...
vllm/entrypoints/openai/serving_completion.py
View file @
f510395b
...
@@ -251,9 +251,6 @@ class OpenAIServingCompletion(OpenAIServing):
...
@@ -251,9 +251,6 @@ class OpenAIServingCompletion(OpenAIServing):
i
]:]
if
output
.
logprobs
else
None
i
]:]
if
output
.
logprobs
else
None
if
request
.
logprobs
is
not
None
:
if
request
.
logprobs
is
not
None
:
assert
top_logprobs
is
not
None
,
(
"top_logprobs must be provided when logprobs "
"is requested"
)
logprobs
=
self
.
_create_logprobs
(
logprobs
=
self
.
_create_logprobs
(
token_ids
=
delta_token_ids
,
token_ids
=
delta_token_ids
,
top_logprobs
=
top_logprobs
,
top_logprobs
=
top_logprobs
,
...
...
vllm/model_executor/layers/sampler.py
View file @
f510395b
...
@@ -534,7 +534,8 @@ def _get_logprobs(
...
@@ -534,7 +534,8 @@ def _get_logprobs(
# Prepare query indices
# Prepare query indices
batched_logprobs_query_seq_indices
:
List
[
int
]
=
[]
batched_logprobs_query_seq_indices
:
List
[
int
]
=
[]
batched_logprobs_query_token_indices
:
List
[
int
]
=
[]
batched_logprobs_query_token_indices
:
List
[
int
]
=
[]
largest_num_logprobs
=
0
# at least get one logprob for each token
largest_num_logprobs
=
1
sample_idx
=
0
sample_idx
=
0
for
i
,
(
seq_group
,
sample_result
)
in
enumerate
(
for
i
,
(
seq_group
,
sample_result
)
in
enumerate
(
zip
(
sampling_metadata
.
seq_groups
,
sample_results
)):
zip
(
sampling_metadata
.
seq_groups
,
sample_results
)):
...
@@ -643,7 +644,7 @@ def _get_logprobs(
...
@@ -643,7 +644,7 @@ def _get_logprobs(
batched_ranks_query_result
[
query_result_idx
].
item
())
batched_ranks_query_result
[
query_result_idx
].
item
())
}
}
query_result_idx
+=
1
query_result_idx
+=
1
if
num_logprobs
>
0
:
if
num_logprobs
>
=
0
:
sample_logprobs_dict
.
update
(
sample_logprobs_dict
.
update
(
zip
(
zip
(
top_token_ids
[
sample_idx
+
top_token_ids
[
sample_idx
+
...
...
vllm/outputs.py
View file @
f510395b
...
@@ -111,7 +111,7 @@ class RequestOutput:
...
@@ -111,7 +111,7 @@ class RequestOutput:
# NOTE: We need omit logprobs here explicitly because the sequence
# NOTE: We need omit logprobs here explicitly because the sequence
# always has the logprobs of the sampled tokens even if the
# always has the logprobs of the sampled tokens even if the
# logprobs are not requested.
# logprobs are not requested.
include_logprobs
=
seq_group
.
sampling_params
.
logprobs
include_logprobs
=
seq_group
.
sampling_params
.
logprobs
is
not
None
outputs
=
[
outputs
=
[
CompletionOutput
(
seqs
.
index
(
seq
),
seq
.
output_text
,
CompletionOutput
(
seqs
.
index
(
seq
),
seq
.
output_text
,
seq
.
get_output_token_ids
(),
seq
.
get_output_token_ids
(),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment