Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b88be221
Unverified
Commit
b88be221
authored
Mar 19, 2025
by
Jennifer Zhao
Committed by
GitHub
Mar 20, 2025
Browse files
[Benchmark] Allow oversample request in benchmark dataset (#15170)
Signed-off-by:
Jennifer Zhao
<
ai.jenniferzhao@gmail.com
>
parent
d8c6d7d6
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
139 additions
and
59 deletions
+139
-59
benchmarks/README.md
benchmarks/README.md
+54
-3
benchmarks/benchmark_dataset.py
benchmarks/benchmark_dataset.py
+85
-56
No files found.
benchmarks/README.md
View file @
b88be221
...
@@ -42,7 +42,7 @@ become available.
...
@@ -42,7 +42,7 @@ become available.
</tr>
</tr>
<tr>
<tr>
<td><strong>
HuggingFace
</strong></td>
<td><strong>
HuggingFace
</strong></td>
<td
style=
"text-align: center;"
>
✅
</td>
<td
style=
"text-align: center;"
>
🟡
</td>
<td
style=
"text-align: center;"
>
🟡
</td>
<td
style=
"text-align: center;"
>
🟡
</td>
<td>
Specify your dataset path on HuggingFace
</td>
<td>
Specify your dataset path on HuggingFace
</td>
</tr>
</tr>
...
@@ -60,8 +60,8 @@ become available.
...
@@ -60,8 +60,8 @@ become available.
🚧: to be supported
🚧: to be supported
🟡: Partial support. Currently, HuggingFaceDataset only supports dataset formats
🟡: Partial support. Currently, HuggingFaceDataset only supports dataset formats
similar to
`lmms-lab/LLaVA-OneVision-Data`
. If you need support for other dataset
similar to
`lmms-lab/LLaVA-OneVision-Data`
and
`Aeala/ShareGPT_Vicuna_unfiltered`
.
formats, please consider contributing.
If you need support for other dataset
formats, please consider contributing.
**Note**
: VisionArena’s
`dataset-name`
should be set to
`hf`
**Note**
: VisionArena’s
`dataset-name`
should be set to
`hf`
...
@@ -139,6 +139,57 @@ python3 vllm/benchmarks/benchmark_serving.py \
...
@@ -139,6 +139,57 @@ python3 vllm/benchmarks/benchmark_serving.py \
--num-prompts
"
${
NUM_PROMPTS
}
"
--num-prompts
"
${
NUM_PROMPTS
}
"
```
```
### HuggingFaceDataset Examples
Currently, HuggingFaceDataset only supports dataset formats
similar to
`lmms-lab/LLaVA-OneVision-Data`
and
`Aeala/ShareGPT_Vicuna_unfiltered`
. If you need support for other dataset
formats, please consider contributing.
```
bash
# need a model with vision capability here
vllm serve Qwen/Qwen2-VL-7B-Instruct
--disable-log-requests
```
**`lmms-lab/LLaVA-OneVision-Data`**
```
bash
MODEL_NAME
=
"Qwen/Qwen2-VL-7B-Instruct"
NUM_PROMPTS
=
10
BACKEND
=
"openai-chat"
DATASET_NAME
=
"hf"
DATASET_PATH
=
"lmms-lab/LLaVA-OneVision-Data"
DATASET_SPLIT
=
'train'
DATASET_SUBSET
=
'chart2text(cauldron)'
python3 vllm/benchmarks/benchmark_serving.py
\
--backend
"
${
BACKEND
}
"
\
--model
"
${
MODEL_NAME
}
"
\
--endpoint
"/v1/chat/completions"
\
--dataset-name
"
${
DATASET_NAME
}
"
\
--dataset-path
"
${
DATASET_PATH
}
"
\
--hf-split
"
${
DATASET_SPLIT
}
"
\
--num-prompts
"
${
NUM_PROMPTS
}
"
\
--hf-subset
"
${
DATASET_SUBSET
}
"
```
**`Aeala/ShareGPT_Vicuna_unfiltered`**
```
bash
MODEL_NAME
=
"Qwen/Qwen2-VL-7B-Instruct"
NUM_PROMPTS
=
10
BACKEND
=
"openai-chat"
DATASET_NAME
=
"hf"
DATASET_PATH
=
"Aeala/ShareGPT_Vicuna_unfiltered"
DATASET_SPLIT
=
'train'
python3 vllm/benchmarks/benchmark_serving.py
\
--backend
"
${
BACKEND
}
"
\
--model
"
${
MODEL_NAME
}
"
\
--endpoint
"/v1/chat/completions"
\
--dataset-name
"
${
DATASET_NAME
}
"
\
--dataset-path
"
${
DATASET_PATH
}
"
\
--hf-split
"
${
DATASET_SPLIT
}
"
\
--num-prompts
"
${
NUM_PROMPTS
}
"
\
```
---
---
## Example - Offline Throughput Benchmark
## Example - Offline Throughput Benchmark
...
...
benchmarks/benchmark_dataset.py
View file @
b88be221
...
@@ -17,6 +17,7 @@ SampleRequest instances, similar to the approach used in ShareGPT.
...
@@ -17,6 +17,7 @@ SampleRequest instances, similar to the approach used in ShareGPT.
import
base64
import
base64
import
io
import
io
import
json
import
json
import
logging
import
random
import
random
from
abc
import
ABC
,
abstractmethod
from
abc
import
ABC
,
abstractmethod
from
collections.abc
import
Mapping
from
collections.abc
import
Mapping
...
@@ -35,6 +36,8 @@ from vllm.lora.utils import get_adapter_absolute_path
...
@@ -35,6 +36,8 @@ from vllm.lora.utils import get_adapter_absolute_path
from
vllm.multimodal
import
MultiModalDataDict
from
vllm.multimodal
import
MultiModalDataDict
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
,
get_lora_tokenizer
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
,
get_lora_tokenizer
logger
=
logging
.
getLogger
(
__name__
)
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Data Classes
# Data Classes
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
...
@@ -61,9 +64,6 @@ class SampleRequest:
...
@@ -61,9 +64,6 @@ class SampleRequest:
class
BenchmarkDataset
(
ABC
):
class
BenchmarkDataset
(
ABC
):
DEFAULT_SEED
=
0
DEFAULT_SEED
=
0
# num_requests has default 1000 in both the benchmark_serving.py and
# benchmark_throughput.py
def
__init__
(
def
__init__
(
self
,
self
,
dataset_path
:
Optional
[
str
]
=
None
,
dataset_path
:
Optional
[
str
]
=
None
,
...
@@ -90,8 +90,8 @@ class BenchmarkDataset(ABC):
...
@@ -90,8 +90,8 @@ class BenchmarkDataset(ABC):
mm_content
:
Optional
[
MultiModalDataDict
]
=
None
)
->
list
[
dict
]:
mm_content
:
Optional
[
MultiModalDataDict
]
=
None
)
->
list
[
dict
]:
"""
"""
Transform a prompt and optional multimodal content into a chat format.
Transform a prompt and optional multimodal content into a chat format.
This method is used for chat models that expect a specific
This method is used for chat models that expect a specific
conversation
conversation
format.
format.
"""
"""
content
=
[{
"text"
:
prompt
,
"type"
:
"text"
}]
content
=
[{
"text"
:
prompt
,
"type"
:
"text"
}]
if
mm_content
is
not
None
:
if
mm_content
is
not
None
:
...
@@ -175,6 +175,24 @@ class BenchmarkDataset(ABC):
...
@@ -175,6 +175,24 @@ class BenchmarkDataset(ABC):
"""
"""
raise
NotImplementedError
(
"sample must be implemented in subclasses."
)
raise
NotImplementedError
(
"sample must be implemented in subclasses."
)
def
maybe_oversample_requests
(
self
,
requests
:
list
[
SampleRequest
],
num_requests
:
int
)
->
None
:
"""
Oversamples the list of requests if its size is less than the desired
number.
Args:
requests (List[SampleRequest]): The current list of sampled
requests. num_requests (int): The target number of requests.
"""
if
len
(
requests
)
<
num_requests
:
random
.
seed
(
self
.
random_seed
)
additional
=
random
.
choices
(
requests
,
k
=
num_requests
-
len
(
requests
))
requests
.
extend
(
additional
)
logger
.
info
(
"Oversampled requests to reach %d total samples."
,
num_requests
)
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Utility Functions and Global Caches
# Utility Functions and Global Caches
...
@@ -276,15 +294,16 @@ class RandomDataset(BenchmarkDataset):
...
@@ -276,15 +294,16 @@ class RandomDataset(BenchmarkDataset):
)
->
None
:
)
->
None
:
super
().
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
def
sample
(
self
,
def
sample
(
self
,
tokenizer
:
PreTrainedTokenizerBase
,
tokenizer
:
PreTrainedTokenizerBase
,
num_requests
:
int
,
num_requests
:
int
,
prefix_len
:
int
=
DEFAULT_PREFIX_LEN
,
prefix_len
:
int
=
DEFAULT_PREFIX_LEN
,
range_ratio
:
float
=
DEFAULT_RANGE_RATIO
,
range_ratio
:
float
=
DEFAULT_RANGE_RATIO
,
input_len
:
int
=
DEFAULT_INPUT_LEN
,
input_len
:
int
=
DEFAULT_INPUT_LEN
,
output_len
:
int
=
DEFAULT_OUTPUT_LEN
,
output_len
:
int
=
DEFAULT_OUTPUT_LEN
,
**
kwargs
)
->
list
[
SampleRequest
]:
**
kwargs
,
)
->
list
[
SampleRequest
]:
vocab_size
=
tokenizer
.
vocab_size
vocab_size
=
tokenizer
.
vocab_size
prefix_token_ids
=
(
np
.
random
.
randint
(
prefix_token_ids
=
(
np
.
random
.
randint
(
...
@@ -346,20 +365,24 @@ class ShareGPTDataset(BenchmarkDataset):
...
@@ -346,20 +365,24 @@ class ShareGPTDataset(BenchmarkDataset):
random
.
seed
(
self
.
random_seed
)
random
.
seed
(
self
.
random_seed
)
random
.
shuffle
(
self
.
data
)
random
.
shuffle
(
self
.
data
)
def
sample
(
self
,
def
sample
(
self
,
tokenizer
:
PreTrainedTokenizerBase
,
tokenizer
:
PreTrainedTokenizerBase
,
num_requests
:
int
,
num_requests
:
int
,
lora_path
:
Optional
[
str
]
=
None
,
lora_path
:
Optional
[
str
]
=
None
,
max_loras
:
Optional
[
int
]
=
None
,
max_loras
:
Optional
[
int
]
=
None
,
output_len
:
Optional
[
int
]
=
None
,
output_len
:
Optional
[
int
]
=
None
,
enable_multimodal_chat
:
bool
=
False
,
enable_multimodal_chat
:
bool
=
False
,
**
kwargs
)
->
list
:
**
kwargs
,
)
->
list
:
samples
:
list
=
[]
samples
:
list
=
[]
for
entry
in
self
.
data
:
for
entry
in
self
.
data
:
if
len
(
samples
)
>=
num_requests
:
if
len
(
samples
)
>=
num_requests
:
break
break
prompt
,
completion
=
entry
[
"conversations"
][
0
][
"value"
],
\
prompt
,
completion
=
(
entry
[
"conversations"
][
1
][
"value"
]
entry
[
"conversations"
][
0
][
"value"
],
entry
[
"conversations"
][
1
][
"value"
],
)
lora_request
,
tokenizer
=
self
.
get_random_lora_request
(
lora_request
,
tokenizer
=
self
.
get_random_lora_request
(
tokenizer
=
tokenizer
,
max_loras
=
max_loras
,
lora_path
=
lora_path
)
tokenizer
=
tokenizer
,
max_loras
=
max_loras
,
lora_path
=
lora_path
)
...
@@ -383,6 +406,7 @@ class ShareGPTDataset(BenchmarkDataset):
...
@@ -383,6 +406,7 @@ class ShareGPTDataset(BenchmarkDataset):
expected_output_len
=
new_output_len
,
expected_output_len
=
new_output_len
,
lora_request
=
lora_request
,
lora_request
=
lora_request
,
))
))
self
.
maybe_oversample_requests
(
samples
,
num_requests
)
return
samples
return
samples
...
@@ -415,19 +439,20 @@ class SonnetDataset(BenchmarkDataset):
...
@@ -415,19 +439,20 @@ class SonnetDataset(BenchmarkDataset):
with
open
(
self
.
dataset_path
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
self
.
dataset_path
,
encoding
=
"utf-8"
)
as
f
:
self
.
data
=
f
.
readlines
()
self
.
data
=
f
.
readlines
()
def
sample
(
self
,
def
sample
(
self
,
tokenizer
,
tokenizer
,
num_requests
:
int
,
num_requests
:
int
,
prefix_len
:
int
=
DEFAULT_PREFIX_LEN
,
prefix_len
:
int
=
DEFAULT_PREFIX_LEN
,
input_len
:
int
=
DEFAULT_INPUT_LEN
,
input_len
:
int
=
DEFAULT_INPUT_LEN
,
output_len
:
int
=
DEFAULT_OUTPUT_LEN
,
output_len
:
int
=
DEFAULT_OUTPUT_LEN
,
return_prompt_formatted
:
bool
=
False
,
return_prompt_formatted
:
bool
=
False
,
**
kwargs
)
->
list
:
**
kwargs
,
)
->
list
:
# Calculate average token length for a poem line.
# Calculate average token length for a poem line.
tokenized_lines
=
[
tokenizer
(
line
).
input_ids
for
line
in
self
.
data
]
tokenized_lines
=
[
tokenizer
(
line
).
input_ids
for
line
in
self
.
data
]
avg_len
=
sum
(
len
(
tokens
)
avg_len
=
sum
(
len
(
tokens
)
for
tokens
in
\
for
tokens
in
tokenized_lines
)
/
len
(
tokenized_lines
)
tokenized_lines
)
/
len
(
tokenized_lines
)
# Build the base prompt.
# Build the base prompt.
base_prompt
=
"Pick as many lines as you can from these poem lines:
\n
"
base_prompt
=
"Pick as many lines as you can from these poem lines:
\n
"
...
@@ -506,12 +531,14 @@ class BurstGPTDataset(BenchmarkDataset):
...
@@ -506,12 +531,14 @@ class BurstGPTDataset(BenchmarkDataset):
# Convert the dataframe to a list of lists.
# Convert the dataframe to a list of lists.
return
data
.
values
.
tolist
()
return
data
.
values
.
tolist
()
def
sample
(
self
,
def
sample
(
self
,
tokenizer
:
PreTrainedTokenizerBase
,
tokenizer
:
PreTrainedTokenizerBase
,
num_requests
:
int
,
num_requests
:
int
,
max_loras
:
Optional
[
int
]
=
None
,
max_loras
:
Optional
[
int
]
=
None
,
lora_path
:
Optional
[
str
]
=
None
,
lora_path
:
Optional
[
str
]
=
None
,
**
kwargs
)
->
list
[
SampleRequest
]:
**
kwargs
,
)
->
list
[
SampleRequest
]:
samples
=
[]
samples
=
[]
data
=
self
.
_sample_loaded_data
(
num_requests
=
num_requests
)
data
=
self
.
_sample_loaded_data
(
num_requests
=
num_requests
)
for
i
in
range
(
num_requests
):
for
i
in
range
(
num_requests
):
...
@@ -544,7 +571,6 @@ class HuggingFaceDataset(BenchmarkDataset):
...
@@ -544,7 +571,6 @@ class HuggingFaceDataset(BenchmarkDataset):
Dataset class for processing a HuggingFace dataset with conversation data
Dataset class for processing a HuggingFace dataset with conversation data
and optional images.
and optional images.
"""
"""
DEFAULT_NUM_REQUESTS
=
1000
def
__init__
(
def
__init__
(
self
,
self
,
...
@@ -618,6 +644,7 @@ class HuggingFaceDataset(BenchmarkDataset):
...
@@ -618,6 +644,7 @@ class HuggingFaceDataset(BenchmarkDataset):
expected_output_len
=
output_len
,
expected_output_len
=
output_len
,
multi_modal_data
=
mm_content
,
multi_modal_data
=
mm_content
,
))
))
self
.
maybe_oversample_requests
(
sampled_requests
,
num_requests
)
return
sampled_requests
return
sampled_requests
...
@@ -632,7 +659,6 @@ class VisionArenaDataset(HuggingFaceDataset):
...
@@ -632,7 +659,6 @@ class VisionArenaDataset(HuggingFaceDataset):
"""
"""
DEFAULT_OUTPUT_LEN
=
128
DEFAULT_OUTPUT_LEN
=
128
DEFAULT_NUM_REQUESTS
=
1000
VISION_ARENA_DATASET_PATH
=
"lmarena-ai/vision-arena-bench-v0.1"
VISION_ARENA_DATASET_PATH
=
"lmarena-ai/vision-arena-bench-v0.1"
def
__init__
(
def
__init__
(
...
@@ -657,12 +683,14 @@ class VisionArenaDataset(HuggingFaceDataset):
...
@@ -657,12 +683,14 @@ class VisionArenaDataset(HuggingFaceDataset):
)
)
self
.
data
=
dataset
.
shuffle
(
seed
=
self
.
random_seed
)
self
.
data
=
dataset
.
shuffle
(
seed
=
self
.
random_seed
)
def
sample
(
self
,
def
sample
(
self
,
tokenizer
:
PreTrainedTokenizerBase
,
tokenizer
:
PreTrainedTokenizerBase
,
num_requests
:
int
,
num_requests
:
int
,
output_len
:
Optional
[
int
]
=
None
,
output_len
:
Optional
[
int
]
=
None
,
enable_multimodal_chat
:
bool
=
False
,
enable_multimodal_chat
:
bool
=
False
,
**
kwargs
)
->
list
:
**
kwargs
,
)
->
list
:
output_len
=
(
output_len
output_len
=
(
output_len
if
output_len
is
not
None
else
self
.
DEFAULT_OUTPUT_LEN
)
if
output_len
is
not
None
else
self
.
DEFAULT_OUTPUT_LEN
)
sampled_requests
=
[]
sampled_requests
=
[]
...
@@ -685,4 +713,5 @@ class VisionArenaDataset(HuggingFaceDataset):
...
@@ -685,4 +713,5 @@ class VisionArenaDataset(HuggingFaceDataset):
expected_output_len
=
output_len
,
expected_output_len
=
output_len
,
multi_modal_data
=
mm_content
,
multi_modal_data
=
mm_content
,
))
))
self
.
maybe_oversample_requests
(
sampled_requests
,
num_requests
)
return
sampled_requests
return
sampled_requests
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment