Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
5a4c96db
Unverified
Commit
5a4c96db
authored
Feb 24, 2026
by
Qi Wang
Committed by
GitHub
Feb 24, 2026
Browse files
test: introduce multimodal benchmark toolkit (#6330)
parent
fcdf6610
Changes
11
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
591 additions
and
0 deletions
+591
-0
benchmarks/multimodal/generate_aiperf_images/README.md
benchmarks/multimodal/generate_aiperf_images/README.md
+18
-0
benchmarks/multimodal/generate_aiperf_images/args.py
benchmarks/multimodal/generate_aiperf_images/args.py
+27
-0
benchmarks/multimodal/generate_aiperf_images/main.py
benchmarks/multimodal/generate_aiperf_images/main.py
+30
-0
benchmarks/multimodal/jsonl/.gitignore
benchmarks/multimodal/jsonl/.gitignore
+1
-0
benchmarks/multimodal/jsonl/README.md
benchmarks/multimodal/jsonl/README.md
+60
-0
benchmarks/multimodal/jsonl/annotations/image_info_test-dev2017.json
...multimodal/jsonl/annotations/image_info_test-dev2017.json
+1
-0
benchmarks/multimodal/jsonl/annotations/image_info_test2017.json
...rks/multimodal/jsonl/annotations/image_info_test2017.json
+1
-0
benchmarks/multimodal/jsonl/args.py
benchmarks/multimodal/jsonl/args.py
+81
-0
benchmarks/multimodal/jsonl/generate_images.py
benchmarks/multimodal/jsonl/generate_images.py
+78
-0
benchmarks/multimodal/jsonl/generate_input_text.py
benchmarks/multimodal/jsonl/generate_input_text.py
+223
-0
benchmarks/multimodal/jsonl/main.py
benchmarks/multimodal/jsonl/main.py
+71
-0
No files found.
benchmarks/multimodal/generate_aiperf_images/README.md
0 → 100644
View file @
5a4c96db
# Generate aiperf Source Images
aiperf's built-in image generator ships with very few source images. When
benchmarking with
`--image-mode base64`
, aiperf picks from its
`assets/source_images/`
directory — a small set means every request sends
nearly identical images, which doesn't stress the multimodal pipeline
realistically.
This script populates that directory with 200 random-noise PNGs so aiperf
has a larger pool to sample from.
## Usage
```
bash
python main.py
```
Images are written directly into aiperf's installed
`source_images/`
directory.
benchmarks/multimodal/generate_aiperf_images/args.py
0 → 100644
View file @
5a4c96db
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""CLI argument parsing for aiperf image generation."""
import
argparse
def
parse_args
()
->
argparse
.
Namespace
:
parser
=
argparse
.
ArgumentParser
(
description
=
"Generate random-noise PNGs into aiperf's source_images directory."
,
)
parser
.
add_argument
(
"--images-pool"
,
type
=
int
,
default
=
200
,
help
=
"Number of unique images to generate (default: 200)"
,
)
parser
.
add_argument
(
"--image-size"
,
type
=
int
,
nargs
=
2
,
default
=
[
512
,
512
],
metavar
=
(
"WIDTH"
,
"HEIGHT"
),
help
=
"Size of generated PNG images in pixels (default: 512 512)"
,
)
return
parser
.
parse_args
()
benchmarks/multimodal/generate_aiperf_images/main.py
0 → 100644
View file @
5a4c96db
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from
pathlib
import
Path
import
aiperf.dataset.generator.image
as
_img_mod
import
numpy
as
np
from
args
import
parse_args
from
PIL
import
Image
TARGET_DIR
=
Path
(
_img_mod
.
__file__
).
parent
/
"assets"
/
"source_images"
def
main
()
->
None
:
args
=
parse_args
()
num_images
:
int
=
args
.
images_pool
width
,
height
=
args
.
image_size
TARGET_DIR
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
rng
=
np
.
random
.
default_rng
(
42
)
for
i
in
range
(
num_images
):
pixels
=
rng
.
integers
(
0
,
256
,
(
height
,
width
,
3
),
dtype
=
np
.
uint8
)
Image
.
fromarray
(
pixels
).
save
(
TARGET_DIR
/
f
"noise_
{
i
:
04
d
}
.png"
)
if
(
i
+
1
)
%
100
==
0
:
print
(
f
"
{
i
+
1
}
/
{
num_images
}
"
)
print
(
f
"
\n
{
num_images
}
unique
{
width
}
x
{
height
}
images saved to
{
TARGET_DIR
}
"
)
if
__name__
==
"__main__"
:
main
()
benchmarks/multimodal/jsonl/.gitignore
0 → 100644
View file @
5a4c96db
*.jsonl
benchmarks/multimodal/jsonl/README.md
0 → 100644
View file @
5a4c96db
# Multimodal JSONL Request Generator
Generates
`.jsonl`
benchmark files for
[
aiperf
](
https://github.com/NVIDIA/aiperf
)
with single-turn multimodal requests (text + images).
## Key concept: image pool reuse
Each request samples images from a fixed pool. A smaller pool relative to total
image slots produces more cross-request image reuse — useful for benchmarking
embedding cache hit rates.
For example, 500 requests x 3 images each = 1500 image slots. With
`--images-pool 200`
, many requests will share the same images.
## Image modes
| Mode |
`--image-mode`
| What goes in the JSONL | Who fetches the image |
|------|---------------|------------------------|----------------------|
| base64 (default) |
`base64`
| Absolute file paths to local PNGs | aiperf reads and base64-encodes before sending |
| HTTP |
`http`
| COCO test2017 URLs | The LLM server downloads images itself |
For
`http`
mode, download COCO annotations first:
```
bash
mkdir
-p
annotations
&&
cd
annotations
wget http://images.cocodataset.org/annotations/image_info_test2017.zip
unzip image_info_test2017.zip
```
## Usage
```
bash
# Defaults: 500 requests, 3 images each, all unique, base64 mode
python main.py
# HTTP mode with COCO URLs
python main.py
--image-mode
http
# Control reuse: 200 requests, pool of 100 unique images
python main.py
-n
200
--images-pool
100
# More images per request
python main.py
-n
100
--images-per-request
20
--images-pool
500
```
Output filename encodes the parameters, e.g.
`500req_3img_200pool_300word_http.jsonl`
.
## Running with aiperf
```
bash
aiperf profile
\
--model
Qwen/Qwen3-VL-30B-A3B-Instruct-FP8
\
--input-file
500req_3img_200pool_300word_http.jsonl
\
--custom-dataset-type
single_turn
\
--shared-system-prompt-length
1000
\
--extra-inputs
"max_tokens:500"
\
--extra-inputs
"min_tokens:500"
\
--extra-inputs
"ignore_eos:true"
```
Note: the JSONL contains actual content (text + image references), not token
counts. Do not pass
`--isl`
— it only applies to synthetic data generation.
benchmarks/multimodal/jsonl/annotations/image_info_test-dev2017.json
0 → 100644
View file @
5a4c96db
This diff is collapsed.
Click to expand it.
benchmarks/multimodal/jsonl/annotations/image_info_test2017.json
0 → 100644
View file @
5a4c96db
This diff is collapsed.
Click to expand it.
benchmarks/multimodal/jsonl/args.py
0 → 100644
View file @
5a4c96db
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""CLI argument parsing for request generation scripts."""
import
argparse
from
pathlib
import
Path
DEFAULT_IMAGES_PER_REQUEST
=
3
USER_TEXT_TOKENS
=
300
COCO_ANNOTATIONS
=
Path
(
__file__
).
parent
/
"annotations"
/
"image_info_test2017.json"
def
parse_args
(
description
:
str
=
""
)
->
argparse
.
Namespace
:
parser
=
argparse
.
ArgumentParser
(
description
=
description
,
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
)
parser
.
add_argument
(
"-n"
,
"--num-requests"
,
type
=
int
,
default
=
500
,
help
=
"Number of requests to generate (default: 500)"
,
)
parser
.
add_argument
(
"--images-pool"
,
type
=
int
,
default
=
None
,
help
=
"Number of unique images in the pool. Each request samples from this pool, "
"so a smaller pool means more cross-request reuse. "
"Default: num_requests * images_per_request (all unique, no reuse)."
,
)
parser
.
add_argument
(
"--images-per-request"
,
type
=
int
,
default
=
DEFAULT_IMAGES_PER_REQUEST
,
help
=
f
"Number of images per request (default:
{
DEFAULT_IMAGES_PER_REQUEST
}
)"
,
)
parser
.
add_argument
(
"-o"
,
"--output"
,
type
=
Path
,
default
=
None
,
help
=
"Output .jsonl path (default: {n}req_{img}img_{pool}pool_{word}word_{mode}.jsonl, e.g. 100req_20img_1000pool_4000word_base64.jsonl)"
,
)
parser
.
add_argument
(
"--image-dir"
,
type
=
Path
,
default
=
Path
(
"/tmp/bench_images"
),
help
=
"Directory to save generated PNG images (default: /tmp/bench_images)"
,
)
parser
.
add_argument
(
"--user-text-tokens"
,
type
=
int
,
default
=
USER_TEXT_TOKENS
,
help
=
f
"Target user text tokens per request (default:
{
USER_TEXT_TOKENS
}
). --isl is an alias."
,
)
parser
.
add_argument
(
"--image-mode"
,
choices
=
[
"base64"
,
"http"
],
default
=
"base64"
,
help
=
"Image loading mode: 'base64' generates local PNGs and puts file paths in "
"the JSONL so aiperf reads and base64-encodes them before sending (default); "
"'http' puts COCO HTTP URLs in the JSONL so the LLM server downloads images itself"
,
)
parser
.
add_argument
(
"--coco-annotations"
,
type
=
Path
,
default
=
COCO_ANNOTATIONS
,
help
=
f
"Path to COCO image_info JSON for --image-mode http (default:
{
COCO_ANNOTATIONS
}
)"
,
)
parser
.
add_argument
(
"--image-size"
,
type
=
int
,
nargs
=
2
,
default
=
[
512
,
512
],
metavar
=
(
"WIDTH"
,
"HEIGHT"
),
help
=
"Size of generated PNG images in pixels (default: 512 512)"
,
)
return
parser
.
parse_args
()
benchmarks/multimodal/jsonl/generate_images.py
0 → 100644
View file @
5a4c96db
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Utilities for generating and sampling image pools."""
import
json
import
random
from
pathlib
import
Path
import
numpy
as
np
from
PIL
import
Image
def
generate_image_pool_base64
(
np_rng
:
np
.
random
.
Generator
,
pool_size
:
int
,
image_dir
:
Path
,
image_size
:
tuple
[
int
,
int
]
=
(
512
,
512
),
)
->
list
[
str
]:
"""Generate pool_size random PNG files and return their paths."""
image_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
pool
:
list
[
str
]
=
[]
for
idx
in
range
(
pool_size
):
path
=
image_dir
/
f
"img_
{
idx
:
04
d
}
.png"
pixels
=
np_rng
.
integers
(
0
,
256
,
(
*
image_size
,
3
),
dtype
=
np
.
uint8
)
Image
.
fromarray
(
pixels
).
save
(
path
)
pool
.
append
(
str
(
path
.
resolve
()))
print
(
f
"
{
pool_size
}
unique
{
image_size
[
0
]
}
x
{
image_size
[
1
]
}
images saved to
{
image_dir
}
"
)
return
pool
def
generate_image_pool_http
(
py_rng
:
random
.
Random
,
pool_size
:
int
,
coco_annotations
:
Path
,
)
->
list
[
str
]:
"""Pick pool_size unique COCO test2017 URLs."""
with
open
(
coco_annotations
)
as
f
:
data
=
json
.
load
(
f
)
all_urls
=
[
img
[
"coco_url"
]
for
img
in
data
[
"images"
]]
if
pool_size
>
len
(
all_urls
):
raise
RuntimeError
(
f
"--images-pool (
{
pool_size
}
) exceeds available COCO images (
{
len
(
all_urls
)
}
). "
f
"Reduce --images-pool."
)
py_rng
.
shuffle
(
all_urls
)
pool
=
all_urls
[:
pool_size
]
print
(
f
"
{
pool_size
}
URLs sampled from
{
coco_annotations
.
name
}
(
{
len
(
all_urls
)
}
available)"
)
return
pool
def
sample_slots
(
py_rng
:
random
.
Random
,
pool
:
list
[
str
],
num_requests
:
int
,
images_per_request
:
int
,
)
->
list
[
str
]:
"""Sample image slots from a fixed pool, no duplicates within each request."""
assert
(
len
(
pool
)
>=
images_per_request
),
f
"images-pool (
{
len
(
pool
)
}
) must be >= images-per-request (
{
images_per_request
}
)"
total_slots
=
num_requests
*
images_per_request
slot_refs
:
list
[
str
]
=
[]
for
_
in
range
(
num_requests
):
slot_refs
.
extend
(
py_rng
.
sample
(
pool
,
images_per_request
))
num_unique
=
len
(
set
(
slot_refs
))
print
(
f
"Generated
{
total_slots
}
image slots from pool of
{
len
(
pool
)
}
: "
f
"
{
num_unique
}
unique in use, "
f
"
{
total_slots
-
num_unique
}
duplicate references "
f
"(
{
(
total_slots
-
num_unique
)
/
total_slots
:.
1
%
}
reuse)"
)
return
slot_refs
benchmarks/multimodal/jsonl/generate_input_text.py
0 → 100644
View file @
5a4c96db
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Utilities for generating filler text that tokenizes predictably."""
import
random
# Common English words that each tokenize to a single BPE token on most LLMs.
ENGLISH_VOCAB
=
[
"the"
,
"be"
,
"to"
,
"of"
,
"and"
,
"a"
,
"in"
,
"that"
,
"have"
,
"I"
,
"it"
,
"for"
,
"not"
,
"on"
,
"with"
,
"he"
,
"as"
,
"you"
,
"do"
,
"at"
,
"this"
,
"but"
,
"his"
,
"by"
,
"from"
,
"they"
,
"we"
,
"say"
,
"her"
,
"she"
,
"or"
,
"an"
,
"will"
,
"my"
,
"one"
,
"all"
,
"would"
,
"there"
,
"their"
,
"what"
,
"so"
,
"up"
,
"out"
,
"if"
,
"about"
,
"who"
,
"get"
,
"which"
,
"go"
,
"me"
,
"when"
,
"make"
,
"can"
,
"like"
,
"time"
,
"no"
,
"just"
,
"him"
,
"know"
,
"take"
,
"people"
,
"into"
,
"year"
,
"your"
,
"good"
,
"some"
,
"could"
,
"them"
,
"see"
,
"other"
,
"than"
,
"then"
,
"now"
,
"look"
,
"only"
,
"come"
,
"its"
,
"over"
,
"think"
,
"also"
,
"back"
,
"after"
,
"use"
,
"two"
,
"how"
,
"our"
,
"work"
,
"first"
,
"well"
,
"way"
,
"even"
,
"new"
,
"want"
,
"because"
,
"any"
,
"these"
,
"give"
,
"day"
,
"most"
,
"us"
,
"great"
,
"world"
,
"still"
,
"own"
,
"find"
,
"here"
,
"thing"
,
"many"
,
"long"
,
"hand"
,
"high"
,
"keep"
,
"place"
,
"start"
,
"might"
,
"old"
,
"home"
,
"big"
,
"end"
,
"while"
,
"last"
,
"turn"
,
"ask"
,
"need"
,
"too"
,
"feel"
,
"seem"
,
"call"
,
"head"
,
"put"
,
"lot"
,
"run"
,
"every"
,
"play"
,
"small"
,
"set"
,
"live"
,
"try"
,
"tell"
,
"few"
,
"part"
,
"change"
,
"help"
,
"show"
,
"house"
,
"both"
,
"side"
,
"point"
,
"such"
,
"name"
,
"each"
,
"right"
,
"move"
,
"must"
,
"real"
,
"left"
,
"same"
,
"much"
,
"open"
,
"near"
,
"line"
,
"build"
,
"power"
,
"water"
,
"city"
,
"tree"
,
"earth"
,
"plan"
,
"food"
,
"dark"
,
"cold"
,
"sure"
,
"car"
,
"face"
,
"nice"
,
"state"
,
"fact"
,
"night"
,
"hard"
,
"read"
,
"idea"
,
"stand"
,
"class"
,
"body"
,
"book"
,
"word"
,
"best"
,
"done"
,
"case"
,
"four"
,
"fire"
,
"front"
,
"rest"
,
"game"
,
"war"
,
"air"
,
"eye"
,
"true"
,
"top"
,
"area"
,
"boy"
,
"girl"
,
"color"
,
"oil"
,
"song"
,
"note"
,
"low"
,
"bed"
,
]
def
generate_filler
(
rng
:
random
.
Random
,
num_tokens
:
int
)
->
str
:
"""Return ~num_tokens worth of space-separated common English words."""
return
" "
.
join
(
rng
.
choice
(
ENGLISH_VOCAB
)
for
_
in
range
(
num_tokens
))
benchmarks/multimodal/jsonl/main.py
0 → 100644
View file @
5a4c96db
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Generate a .jsonl benchmark file for aiperf (single-turn, text + images).
Images are drawn from a fixed pool; a smaller pool produces more cross-request
reuse. Supports base64 (local PNGs) and http (COCO URLs) image modes.
Usage:
python main.py
python main.py --image-mode http
python main.py -n 200 --images-pool 100
"""
import
json
import
random
import
time
from
pathlib
import
Path
import
numpy
as
np
from
args
import
parse_args
from
generate_images
import
(
generate_image_pool_base64
,
generate_image_pool_http
,
sample_slots
,
)
from
generate_input_text
import
generate_filler
SEED
=
int
(
time
.
time
()
*
1000
)
%
(
2
**
32
)
def
main
()
->
None
:
args
=
parse_args
(
__doc__
)
num_requests
:
int
=
args
.
num_requests
images_per_request
:
int
=
args
.
images_per_request
image_pool
:
int
=
args
.
images_pool
or
(
num_requests
*
images_per_request
)
np_rng
=
np
.
random
.
default_rng
(
SEED
)
py_rng
=
random
.
Random
(
SEED
)
if
args
.
image_mode
==
"http"
:
pool
=
generate_image_pool_http
(
py_rng
,
image_pool
,
args
.
coco_annotations
)
else
:
pool
=
generate_image_pool_base64
(
np_rng
,
image_pool
,
args
.
image_dir
,
tuple
(
args
.
image_size
)
)
slot_refs
=
sample_slots
(
py_rng
,
pool
,
num_requests
,
images_per_request
)
unique_images
=
len
(
set
(
slot_refs
))
output_path
=
args
.
output
if
output_path
is
None
:
output_path
=
(
Path
(
__file__
).
parent
/
f
"
{
num_requests
}
req_
{
images_per_request
}
img_
{
unique_images
}
pool_
{
args
.
user_text_tokens
}
word_
{
args
.
image_mode
}
.jsonl"
)
with
open
(
output_path
,
"w"
)
as
f
:
for
i
in
range
(
num_requests
):
user_text
=
generate_filler
(
py_rng
,
args
.
user_text_tokens
)
start
=
i
*
images_per_request
images
=
slot_refs
[
start
:
start
+
images_per_request
]
line
=
json
.
dumps
(
{
"text"
:
user_text
,
"images"
:
images
},
separators
=
(
","
,
":"
)
)
f
.
write
(
line
+
"
\n
"
)
print
(
f
"Wrote
{
num_requests
}
requests to
{
output_path
}
"
)
if
__name__
==
"__main__"
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment