Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
22341b99
Unverified
Commit
22341b99
authored
Aug 15, 2025
by
Staszek Paśko
Committed by
GitHub
Aug 15, 2025
Browse files
Improve multimodal hasher performance for re-used Image prompts (#22825)
Signed-off-by:
Staszek Pasko
<
staszek@gmail.com
>
parent
49252cf5
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
26 additions
and
0 deletions
+26
-0
tests/multimodal/test_hasher.py
tests/multimodal/test_hasher.py
+20
-0
vllm/multimodal/hasher.py
vllm/multimodal/hasher.py
+6
-0
No files found.
tests/multimodal/test_hasher.py
View file @
22341b99
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
uuid
from
pathlib
import
Path
from
pathlib
import
Path
import
numpy
as
np
import
numpy
as
np
...
@@ -72,3 +73,22 @@ def test_hash_non_contiguous_array():
...
@@ -72,3 +73,22 @@ def test_hash_non_contiguous_array():
hasher
=
MultiModalHasher
hasher
=
MultiModalHasher
# Both should be hashable and produce the same hashes
# Both should be hashable and produce the same hashes
assert
hasher
.
hash_kwargs
(
data
=
arr
)
==
hasher
.
hash_kwargs
(
data
=
arr_c
)
assert
hasher
.
hash_kwargs
(
data
=
arr
)
==
hasher
.
hash_kwargs
(
data
=
arr_c
)
def
test_hash_image_exif_id
():
# Test that EXIF ImageId tag can be used to store UUID
# and the hasher will use that instead of the image data.
image1
=
image2
=
Image
.
new
(
"1"
,
size
=
(
10
,
20
))
id
=
uuid
.
uuid4
()
image1
.
getexif
()[
Image
.
ExifTags
.
Base
.
ImageID
]
=
id
image2
=
Image
.
open
(
ASSETS_DIR
/
"image1.png"
)
image2
.
getexif
()[
Image
.
ExifTags
.
Base
.
ImageID
]
=
"Not a UUID"
image2a
=
Image
.
open
(
ASSETS_DIR
/
"image1.png"
)
hasher
=
MultiModalHasher
# first image has UUID in ImageID, so it should hash to that UUID
assert
hasher
.
hash_kwargs
(
image
=
image1
)
==
hasher
.
hash_kwargs
(
image
=
id
.
bytes
)
# second image has non-UUID in ImageID, so it should hash to the image data
assert
hasher
.
hash_kwargs
(
image
=
image2
)
==
hasher
.
hash_kwargs
(
image
=
image2a
)
vllm/multimodal/hasher.py
View file @
22341b99
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pickle
import
pickle
import
uuid
from
collections.abc
import
Iterable
,
Mapping
from
collections.abc
import
Iterable
,
Mapping
from
typing
import
Union
from
typing
import
Union
...
@@ -34,6 +35,11 @@ class MultiModalHasher:
...
@@ -34,6 +35,11 @@ class MultiModalHasher:
return
np
.
array
(
obj
).
tobytes
()
return
np
.
array
(
obj
).
tobytes
()
if
isinstance
(
obj
,
Image
.
Image
):
if
isinstance
(
obj
,
Image
.
Image
):
exif
=
obj
.
getexif
()
if
Image
.
ExifTags
.
Base
.
ImageID
in
exif
and
isinstance
(
exif
[
Image
.
ExifTags
.
Base
.
ImageID
],
uuid
.
UUID
):
# If the image has exif ImageID tag, use that
return
exif
[
Image
.
ExifTags
.
Base
.
ImageID
].
bytes
return
cls
.
item_to_bytes
(
return
cls
.
item_to_bytes
(
"image"
,
np
.
asarray
(
convert_image_mode
(
obj
,
"RGBA"
)))
"image"
,
np
.
asarray
(
convert_image_mode
(
obj
,
"RGBA"
)))
if
isinstance
(
obj
,
torch
.
Tensor
):
if
isinstance
(
obj
,
torch
.
Tensor
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment