Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e5cab715
Unverified
Commit
e5cab715
authored
Sep 06, 2024
by
afeldman-nm
Committed by
GitHub
Sep 06, 2024
Browse files
[Frontend] Add --logprobs argument to `benchmark_serving.py` (#8191)
parent
baa54675
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
19 additions
and
1 deletion
+19
-1
benchmarks/backend_request_func.py
benchmarks/backend_request_func.py
+2
-0
benchmarks/benchmark_serving.py
benchmarks/benchmark_serving.py
+16
-0
tests/multi_step/test_correctness_llm.py
tests/multi_step/test_correctness_llm.py
+1
-1
No files found.
benchmarks/backend_request_func.py
View file @
e5cab715
...
@@ -24,6 +24,7 @@ class RequestFuncInput:
...
@@ -24,6 +24,7 @@ class RequestFuncInput:
model
:
str
model
:
str
best_of
:
int
=
1
best_of
:
int
=
1
use_beam_search
:
bool
=
False
use_beam_search
:
bool
=
False
logprobs
:
Optional
[
int
]
=
None
@
dataclass
@
dataclass
...
@@ -236,6 +237,7 @@ async def async_request_openai_completions(
...
@@ -236,6 +237,7 @@ async def async_request_openai_completions(
"temperature"
:
0.0
,
"temperature"
:
0.0
,
"best_of"
:
request_func_input
.
best_of
,
"best_of"
:
request_func_input
.
best_of
,
"max_tokens"
:
request_func_input
.
output_len
,
"max_tokens"
:
request_func_input
.
output_len
,
"logprobs"
:
request_func_input
.
logprobs
,
"stream"
:
True
,
"stream"
:
True
,
}
}
headers
=
{
headers
=
{
...
...
benchmarks/benchmark_serving.py
View file @
e5cab715
...
@@ -318,6 +318,7 @@ async def benchmark(
...
@@ -318,6 +318,7 @@ async def benchmark(
model_id
:
str
,
model_id
:
str
,
tokenizer
:
PreTrainedTokenizerBase
,
tokenizer
:
PreTrainedTokenizerBase
,
input_requests
:
List
[
Tuple
[
str
,
int
,
int
]],
input_requests
:
List
[
Tuple
[
str
,
int
,
int
]],
logprobs
:
Optional
[
int
],
best_of
:
int
,
best_of
:
int
,
use_beam_search
:
bool
,
use_beam_search
:
bool
,
request_rate
:
float
,
request_rate
:
float
,
...
@@ -339,6 +340,7 @@ async def benchmark(
...
@@ -339,6 +340,7 @@ async def benchmark(
api_url
=
api_url
,
api_url
=
api_url
,
prompt_len
=
test_prompt_len
,
prompt_len
=
test_prompt_len
,
output_len
=
test_output_len
,
output_len
=
test_output_len
,
logprobs
=
logprobs
,
best_of
=
best_of
,
best_of
=
best_of
,
use_beam_search
=
use_beam_search
,
use_beam_search
=
use_beam_search
,
)
)
...
@@ -358,6 +360,7 @@ async def benchmark(
...
@@ -358,6 +360,7 @@ async def benchmark(
api_url
=
base_url
+
"/start_profile"
,
api_url
=
base_url
+
"/start_profile"
,
prompt_len
=
test_prompt_len
,
prompt_len
=
test_prompt_len
,
output_len
=
test_output_len
,
output_len
=
test_output_len
,
logprobs
=
logprobs
,
best_of
=
best_of
,
best_of
=
best_of
,
use_beam_search
=
use_beam_search
,
use_beam_search
=
use_beam_search
,
)
)
...
@@ -379,6 +382,7 @@ async def benchmark(
...
@@ -379,6 +382,7 @@ async def benchmark(
api_url
=
api_url
,
api_url
=
api_url
,
prompt_len
=
prompt_len
,
prompt_len
=
prompt_len
,
output_len
=
output_len
,
output_len
=
output_len
,
logprobs
=
logprobs
,
best_of
=
best_of
,
best_of
=
best_of
,
use_beam_search
=
use_beam_search
,
use_beam_search
=
use_beam_search
,
)
)
...
@@ -396,6 +400,7 @@ async def benchmark(
...
@@ -396,6 +400,7 @@ async def benchmark(
api_url
=
base_url
+
"/stop_profile"
,
api_url
=
base_url
+
"/stop_profile"
,
prompt_len
=
test_prompt_len
,
prompt_len
=
test_prompt_len
,
output_len
=
test_output_len
,
output_len
=
test_output_len
,
logprobs
=
logprobs
,
best_of
=
best_of
,
best_of
=
best_of
,
use_beam_search
=
use_beam_search
,
use_beam_search
=
use_beam_search
,
)
)
...
@@ -580,6 +585,7 @@ def main(args: argparse.Namespace):
...
@@ -580,6 +585,7 @@ def main(args: argparse.Namespace):
model_id
=
model_id
,
model_id
=
model_id
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
input_requests
=
input_requests
,
input_requests
=
input_requests
,
logprobs
=
args
.
logprobs
,
best_of
=
args
.
best_of
,
best_of
=
args
.
best_of
,
use_beam_search
=
args
.
use_beam_search
,
use_beam_search
=
args
.
use_beam_search
,
request_rate
=
args
.
request_rate
,
request_rate
=
args
.
request_rate
,
...
@@ -721,6 +727,16 @@ if __name__ == "__main__":
...
@@ -721,6 +727,16 @@ if __name__ == "__main__":
help
=
help
=
"Number of output tokens per request, used only for sonnet dataset."
,
"Number of output tokens per request, used only for sonnet dataset."
,
)
)
parser
.
add_argument
(
"--logprobs"
,
type
=
int
,
default
=
None
,
help
=
(
"Number of logprobs-per-token to compute & return as part of "
"the request. If unspecified, then either (1) if beam search "
"is disabled, no logprobs are computed & a single dummy "
"logprob is returned for each token; or (2) if beam search "
"is enabled 1 logprob per token is computed"
),
)
parser
.
add_argument
(
parser
.
add_argument
(
"--sonnet-prefix-len"
,
"--sonnet-prefix-len"
,
type
=
int
,
type
=
int
,
...
...
tests/multi_step/test_correctness_llm.py
View file @
e5cab715
...
@@ -57,7 +57,7 @@ def test_multi_step_llm(
...
@@ -57,7 +57,7 @@ def test_multi_step_llm(
GPU -> CPU output transfer
GPU -> CPU output transfer
num_prompts: number of example prompts under test
num_prompts: number of example prompts under test
num_logprobs: corresponds to the `logprobs` argument to the OpenAI
num_logprobs: corresponds to the `logprobs` argument to the OpenAI
completions endpoint; `None` ->
no
logprob
s
completions endpoint; `None` ->
1
logprob
returned.
"""
"""
prompts
=
example_prompts
prompts
=
example_prompts
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment