Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b88be221
Unverified
Commit
b88be221
authored
Mar 19, 2025
by
Jennifer Zhao
Committed by
GitHub
Mar 20, 2025
Browse files
[Benchmark] Allow oversample request in benchmark dataset (#15170)
Signed-off-by:
Jennifer Zhao
<
ai.jenniferzhao@gmail.com
>
parent
d8c6d7d6
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
139 additions
and
59 deletions
+139
-59
benchmarks/README.md
benchmarks/README.md
+54
-3
benchmarks/benchmark_dataset.py
benchmarks/benchmark_dataset.py
+85
-56
No files found.
benchmarks/README.md
View file @
b88be221
...
...
@@ -42,7 +42,7 @@ become available.
</tr>
<tr>
<td><strong>
HuggingFace
</strong></td>
<td
style=
"text-align: center;"
>
✅
</td>
<td
style=
"text-align: center;"
>
🟡
</td>
<td
style=
"text-align: center;"
>
🟡
</td>
<td>
Specify your dataset path on HuggingFace
</td>
</tr>
...
...
@@ -60,8 +60,8 @@ become available.
🚧: to be supported
🟡: Partial support. Currently, HuggingFaceDataset only supports dataset formats
similar to
`lmms-lab/LLaVA-OneVision-Data`
. If you need support for other dataset
formats, please consider contributing.
similar to
`lmms-lab/LLaVA-OneVision-Data`
and
`Aeala/ShareGPT_Vicuna_unfiltered`
.
If you need support for other dataset
formats, please consider contributing.
**Note**
: VisionArena’s
`dataset-name`
should be set to
`hf`
...
...
@@ -139,6 +139,57 @@ python3 vllm/benchmarks/benchmark_serving.py \
--num-prompts
"
${
NUM_PROMPTS
}
"
```
### HuggingFaceDataset Examples
Currently, HuggingFaceDataset only supports dataset formats
similar to
`lmms-lab/LLaVA-OneVision-Data`
and
`Aeala/ShareGPT_Vicuna_unfiltered`
. If you need support for other dataset
formats, please consider contributing.
```
bash
# need a model with vision capability here
vllm serve Qwen/Qwen2-VL-7B-Instruct
--disable-log-requests
```
**`lmms-lab/LLaVA-OneVision-Data`**
```
bash
MODEL_NAME
=
"Qwen/Qwen2-VL-7B-Instruct"
NUM_PROMPTS
=
10
BACKEND
=
"openai-chat"
DATASET_NAME
=
"hf"
DATASET_PATH
=
"lmms-lab/LLaVA-OneVision-Data"
DATASET_SPLIT
=
'train'
DATASET_SUBSET
=
'chart2text(cauldron)'
python3 vllm/benchmarks/benchmark_serving.py
\
--backend
"
${
BACKEND
}
"
\
--model
"
${
MODEL_NAME
}
"
\
--endpoint
"/v1/chat/completions"
\
--dataset-name
"
${
DATASET_NAME
}
"
\
--dataset-path
"
${
DATASET_PATH
}
"
\
--hf-split
"
${
DATASET_SPLIT
}
"
\
--num-prompts
"
${
NUM_PROMPTS
}
"
\
--hf-subset
"
${
DATASET_SUBSET
}
"
```
**`Aeala/ShareGPT_Vicuna_unfiltered`**
```
bash
MODEL_NAME
=
"Qwen/Qwen2-VL-7B-Instruct"
NUM_PROMPTS
=
10
BACKEND
=
"openai-chat"
DATASET_NAME
=
"hf"
DATASET_PATH
=
"Aeala/ShareGPT_Vicuna_unfiltered"
DATASET_SPLIT
=
'train'
python3 vllm/benchmarks/benchmark_serving.py
\
--backend
"
${
BACKEND
}
"
\
--model
"
${
MODEL_NAME
}
"
\
--endpoint
"/v1/chat/completions"
\
--dataset-name
"
${
DATASET_NAME
}
"
\
--dataset-path
"
${
DATASET_PATH
}
"
\
--hf-split
"
${
DATASET_SPLIT
}
"
\
--num-prompts
"
${
NUM_PROMPTS
}
"
\
```
---
## Example - Offline Throughput Benchmark
...
...
benchmarks/benchmark_dataset.py
View file @
b88be221
...
...
@@ -17,6 +17,7 @@ SampleRequest instances, similar to the approach used in ShareGPT.
import
base64
import
io
import
json
import
logging
import
random
from
abc
import
ABC
,
abstractmethod
from
collections.abc
import
Mapping
...
...
@@ -35,6 +36,8 @@ from vllm.lora.utils import get_adapter_absolute_path
from
vllm.multimodal
import
MultiModalDataDict
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
,
get_lora_tokenizer
logger
=
logging
.
getLogger
(
__name__
)
# -----------------------------------------------------------------------------
# Data Classes
# -----------------------------------------------------------------------------
...
...
@@ -61,9 +64,6 @@ class SampleRequest:
class
BenchmarkDataset
(
ABC
):
DEFAULT_SEED
=
0
# num_requests has default 1000 in both the benchmark_serving.py and
# benchmark_throughput.py
def
__init__
(
self
,
dataset_path
:
Optional
[
str
]
=
None
,
...
...
@@ -90,8 +90,8 @@ class BenchmarkDataset(ABC):
mm_content
:
Optional
[
MultiModalDataDict
]
=
None
)
->
list
[
dict
]:
"""
Transform a prompt and optional multimodal content into a chat format.
This method is used for chat models that expect a specific
conversation
format.
This method is used for chat models that expect a specific
conversation
format.
"""
content
=
[{
"text"
:
prompt
,
"type"
:
"text"
}]
if
mm_content
is
not
None
:
...
...
@@ -175,6 +175,24 @@ class BenchmarkDataset(ABC):
"""
raise
NotImplementedError
(
"sample must be implemented in subclasses."
)
def
maybe_oversample_requests
(
self
,
requests
:
list
[
SampleRequest
],
num_requests
:
int
)
->
None
:
"""
Oversamples the list of requests if its size is less than the desired
number.
Args:
requests (List[SampleRequest]): The current list of sampled
requests. num_requests (int): The target number of requests.
"""
if
len
(
requests
)
<
num_requests
:
random
.
seed
(
self
.
random_seed
)
additional
=
random
.
choices
(
requests
,
k
=
num_requests
-
len
(
requests
))
requests
.
extend
(
additional
)
logger
.
info
(
"Oversampled requests to reach %d total samples."
,
num_requests
)
# -----------------------------------------------------------------------------
# Utility Functions and Global Caches
...
...
@@ -276,15 +294,16 @@ class RandomDataset(BenchmarkDataset):
)
->
None
:
super
().
__init__
(
**
kwargs
)
def
sample
(
self
,
def
sample
(
self
,
tokenizer
:
PreTrainedTokenizerBase
,
num_requests
:
int
,
prefix_len
:
int
=
DEFAULT_PREFIX_LEN
,
range_ratio
:
float
=
DEFAULT_RANGE_RATIO
,
input_len
:
int
=
DEFAULT_INPUT_LEN
,
output_len
:
int
=
DEFAULT_OUTPUT_LEN
,
**
kwargs
)
->
list
[
SampleRequest
]:
**
kwargs
,
)
->
list
[
SampleRequest
]:
vocab_size
=
tokenizer
.
vocab_size
prefix_token_ids
=
(
np
.
random
.
randint
(
...
...
@@ -346,20 +365,24 @@ class ShareGPTDataset(BenchmarkDataset):
random
.
seed
(
self
.
random_seed
)
random
.
shuffle
(
self
.
data
)
def
sample
(
self
,
def
sample
(
self
,
tokenizer
:
PreTrainedTokenizerBase
,
num_requests
:
int
,
lora_path
:
Optional
[
str
]
=
None
,
max_loras
:
Optional
[
int
]
=
None
,
output_len
:
Optional
[
int
]
=
None
,
enable_multimodal_chat
:
bool
=
False
,
**
kwargs
)
->
list
:
**
kwargs
,
)
->
list
:
samples
:
list
=
[]
for
entry
in
self
.
data
:
if
len
(
samples
)
>=
num_requests
:
break
prompt
,
completion
=
entry
[
"conversations"
][
0
][
"value"
],
\
entry
[
"conversations"
][
1
][
"value"
]
prompt
,
completion
=
(
entry
[
"conversations"
][
0
][
"value"
],
entry
[
"conversations"
][
1
][
"value"
],
)
lora_request
,
tokenizer
=
self
.
get_random_lora_request
(
tokenizer
=
tokenizer
,
max_loras
=
max_loras
,
lora_path
=
lora_path
)
...
...
@@ -383,6 +406,7 @@ class ShareGPTDataset(BenchmarkDataset):
expected_output_len
=
new_output_len
,
lora_request
=
lora_request
,
))
self
.
maybe_oversample_requests
(
samples
,
num_requests
)
return
samples
...
...
@@ -415,19 +439,20 @@ class SonnetDataset(BenchmarkDataset):
with
open
(
self
.
dataset_path
,
encoding
=
"utf-8"
)
as
f
:
self
.
data
=
f
.
readlines
()
def
sample
(
self
,
def
sample
(
self
,
tokenizer
,
num_requests
:
int
,
prefix_len
:
int
=
DEFAULT_PREFIX_LEN
,
input_len
:
int
=
DEFAULT_INPUT_LEN
,
output_len
:
int
=
DEFAULT_OUTPUT_LEN
,
return_prompt_formatted
:
bool
=
False
,
**
kwargs
)
->
list
:
**
kwargs
,
)
->
list
:
# Calculate average token length for a poem line.
tokenized_lines
=
[
tokenizer
(
line
).
input_ids
for
line
in
self
.
data
]
avg_len
=
sum
(
len
(
tokens
)
for
tokens
in
\
tokenized_lines
)
/
len
(
tokenized_lines
)
for
tokens
in
tokenized_lines
)
/
len
(
tokenized_lines
)
# Build the base prompt.
base_prompt
=
"Pick as many lines as you can from these poem lines:
\n
"
...
...
@@ -506,12 +531,14 @@ class BurstGPTDataset(BenchmarkDataset):
# Convert the dataframe to a list of lists.
return
data
.
values
.
tolist
()
def
sample
(
self
,
def
sample
(
self
,
tokenizer
:
PreTrainedTokenizerBase
,
num_requests
:
int
,
max_loras
:
Optional
[
int
]
=
None
,
lora_path
:
Optional
[
str
]
=
None
,
**
kwargs
)
->
list
[
SampleRequest
]:
**
kwargs
,
)
->
list
[
SampleRequest
]:
samples
=
[]
data
=
self
.
_sample_loaded_data
(
num_requests
=
num_requests
)
for
i
in
range
(
num_requests
):
...
...
@@ -544,7 +571,6 @@ class HuggingFaceDataset(BenchmarkDataset):
Dataset class for processing a HuggingFace dataset with conversation data
and optional images.
"""
DEFAULT_NUM_REQUESTS
=
1000
def
__init__
(
self
,
...
...
@@ -618,6 +644,7 @@ class HuggingFaceDataset(BenchmarkDataset):
expected_output_len
=
output_len
,
multi_modal_data
=
mm_content
,
))
self
.
maybe_oversample_requests
(
sampled_requests
,
num_requests
)
return
sampled_requests
...
...
@@ -632,7 +659,6 @@ class VisionArenaDataset(HuggingFaceDataset):
"""
DEFAULT_OUTPUT_LEN
=
128
DEFAULT_NUM_REQUESTS
=
1000
VISION_ARENA_DATASET_PATH
=
"lmarena-ai/vision-arena-bench-v0.1"
def
__init__
(
...
...
@@ -657,12 +683,14 @@ class VisionArenaDataset(HuggingFaceDataset):
)
self
.
data
=
dataset
.
shuffle
(
seed
=
self
.
random_seed
)
def
sample
(
self
,
def
sample
(
self
,
tokenizer
:
PreTrainedTokenizerBase
,
num_requests
:
int
,
output_len
:
Optional
[
int
]
=
None
,
enable_multimodal_chat
:
bool
=
False
,
**
kwargs
)
->
list
:
**
kwargs
,
)
->
list
:
output_len
=
(
output_len
if
output_len
is
not
None
else
self
.
DEFAULT_OUTPUT_LEN
)
sampled_requests
=
[]
...
...
@@ -685,4 +713,5 @@ class VisionArenaDataset(HuggingFaceDataset):
expected_output_len
=
output_len
,
multi_modal_data
=
mm_content
,
))
self
.
maybe_oversample_requests
(
sampled_requests
,
num_requests
)
return
sampled_requests
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment