Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
cfe45320
Unverified
Commit
cfe45320
authored
Apr 28, 2025
by
Ekagra Ranjan
Committed by
GitHub
Apr 28, 2025
Browse files
[Benchmark] Add single turn MTBench to Serving Bench (#17202)
parent
8fc88d63
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
60 additions
and
3 deletions
+60
-3
benchmarks/benchmark_dataset.py
benchmarks/benchmark_dataset.py
+54
-0
benchmarks/benchmark_serving.py
benchmarks/benchmark_serving.py
+6
-3
No files found.
benchmarks/benchmark_dataset.py
View file @
cfe45320
...
...
@@ -771,6 +771,60 @@ class InstructCoderDataset(HuggingFaceDataset):
return
sampled_requests
# -----------------------------------------------------------------------------
# MT-Bench Dataset Implementation
# -----------------------------------------------------------------------------
class
MTBenchDataset
(
HuggingFaceDataset
):
"""
MT-Bench Dataset.
https://huggingface.co/datasets/philschmid/mt-bench
We create a single turn dataset for MT-Bench.
This is similar to Spec decoding benchmark setup in vLLM
https://github.com/vllm-project/vllm/blob/9d98ab5ec/examples/offline_inference/eagle.py#L14-L18
"""
# noqa: E501
DEFAULT_OUTPUT_LEN
=
256
# avg len used in SD bench in vLLM
SUPPORTED_DATASET_PATHS
=
{
"philschmid/mt-bench"
,
}
def
sample
(
self
,
tokenizer
:
PreTrainedTokenizerBase
,
num_requests
:
int
,
output_len
:
Optional
[
int
]
=
None
,
enable_multimodal_chat
:
bool
=
False
,
**
kwargs
)
->
list
:
output_len
=
(
output_len
if
output_len
is
not
None
else
self
.
DEFAULT_OUTPUT_LEN
)
sampled_requests
=
[]
for
item
in
self
.
data
:
if
len
(
sampled_requests
)
>=
num_requests
:
break
prompt
=
item
[
'turns'
][
0
]
# apply template
prompt
=
tokenizer
.
apply_chat_template
([{
"role"
:
"user"
,
"content"
:
prompt
}],
add_generation_prompt
=
True
,
tokenize
=
False
)
prompt_len
=
len
(
tokenizer
(
prompt
).
input_ids
)
sampled_requests
.
append
(
SampleRequest
(
prompt
=
prompt
,
prompt_len
=
prompt_len
,
expected_output_len
=
output_len
,
))
self
.
maybe_oversample_requests
(
sampled_requests
,
num_requests
)
return
sampled_requests
# -----------------------------------------------------------------------------
# AIMO Dataset Implementation
# -----------------------------------------------------------------------------
...
...
benchmarks/benchmark_serving.py
View file @
cfe45320
...
...
@@ -52,9 +52,9 @@ except ImportError:
from
benchmark_dataset
import
(
AIMODataset
,
ASRDataset
,
BurstGPTDataset
,
ConversationDataset
,
HuggingFaceDataset
,
InstructCoderDataset
,
Random
Dataset
,
SampleRequest
,
ShareGPTDataset
,
SonnetDataset
,
VisionArenaDataset
)
InstructCoderDataset
,
MTBench
Dataset
,
RandomDataset
,
SampleRequest
,
ShareGPTDataset
,
SonnetDataset
,
VisionArenaDataset
)
from
benchmark_utils
import
convert_to_pytorch_benchmark_format
,
write_to_json
MILLISECONDS_TO_SECONDS_CONVERSION
=
1000
...
...
@@ -595,6 +595,9 @@ def main(args: argparse.Namespace):
elif
args
.
dataset_path
in
InstructCoderDataset
.
SUPPORTED_DATASET_PATHS
:
dataset_class
=
InstructCoderDataset
args
.
hf_split
=
"train"
elif
args
.
dataset_path
in
MTBenchDataset
.
SUPPORTED_DATASET_PATHS
:
dataset_class
=
MTBenchDataset
args
.
hf_split
=
"train"
elif
args
.
dataset_path
in
ConversationDataset
.
SUPPORTED_DATASET_PATHS
:
dataset_class
=
ConversationDataset
elif
args
.
dataset_path
in
AIMODataset
.
SUPPORTED_DATASET_PATHS
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment