Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
99404f53
Unverified
Commit
99404f53
authored
May 02, 2025
by
Cyrus Leung
Committed by
GitHub
May 02, 2025
Browse files
[Security] Fix image hash collision (#17378)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
785d75a0
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
83 additions
and
10 deletions
+83
-10
tests/multimodal/assets/image1.png
tests/multimodal/assets/image1.png
+0
-0
tests/multimodal/assets/image2.png
tests/multimodal/assets/image2.png
+0
-0
tests/multimodal/test_hasher.py
tests/multimodal/test_hasher.py
+61
-0
vllm/multimodal/hasher.py
vllm/multimodal/hasher.py
+22
-10
No files found.
tests/multimodal/assets/image1.png
0 → 100644
View file @
99404f53
1.79 KB
tests/multimodal/assets/image2.png
0 → 100644
View file @
99404f53
1.79 KB
tests/multimodal/test_hasher.py
0 → 100644
View file @
99404f53
# SPDX-License-Identifier: Apache-2.0
from
pathlib
import
Path
import
numpy
as
np
import
pytest
import
torch
from
PIL
import
Image
,
ImageDraw
from
vllm.multimodal.hasher
import
MultiModalHasher
ASSETS_DIR
=
Path
(
__file__
).
parent
/
"assets"
assert
ASSETS_DIR
.
exists
()
# NOTE: Images that are the same visually are allowed to have the same hash
@
pytest
.
mark
.
parametrize
(
"mode_pair"
,
[(
"1"
,
"L"
),
(
"RGBA"
,
"CMYK"
)])
def
test_hash_collision_image_mode
(
mode_pair
):
mode1
,
mode2
=
mode_pair
image1
=
Image
.
new
(
mode1
,
size
=
(
10
,
10
),
color
=
1
)
image2
=
Image
.
new
(
mode2
,
size
=
(
10
,
10
),
color
=
1
)
hasher
=
MultiModalHasher
assert
hasher
.
hash_kwargs
(
image
=
image1
)
!=
hasher
.
hash_kwargs
(
image
=
image2
)
def
test_hash_collision_image_palette
():
# These images differ only in Image.palette._palette
image1
=
Image
.
open
(
ASSETS_DIR
/
"image1.png"
)
image2
=
Image
.
open
(
ASSETS_DIR
/
"image2.png"
)
hasher
=
MultiModalHasher
assert
hasher
.
hash_kwargs
(
image
=
image1
)
!=
hasher
.
hash_kwargs
(
image
=
image2
)
def
test_hash_collision_image_transpose
():
image1
=
Image
.
new
(
"1"
,
size
=
(
10
,
20
))
ImageDraw
.
Draw
(
image1
).
line
([(
0
,
0
),
(
10
,
0
)])
image2
=
Image
.
new
(
"1"
,
size
=
(
20
,
10
))
ImageDraw
.
Draw
(
image2
).
line
([(
0
,
0
),
(
0
,
10
)])
hasher
=
MultiModalHasher
assert
hasher
.
hash_kwargs
(
image
=
image1
)
!=
hasher
.
hash_kwargs
(
image
=
image2
)
def
test_hash_collision_tensor_shape
():
# The hash should be different though the data is the same when flattened
arr1
=
torch
.
zeros
((
5
,
10
,
20
,
3
))
arr2
=
torch
.
zeros
((
10
,
20
,
5
,
3
))
hasher
=
MultiModalHasher
assert
hasher
.
hash_kwargs
(
data
=
arr1
)
!=
hasher
.
hash_kwargs
(
data
=
arr2
)
def
test_hash_collision_array_shape
():
# The hash should be different though the data is the same when flattened
arr1
=
np
.
zeros
((
5
,
10
,
20
,
3
))
arr2
=
np
.
zeros
((
10
,
20
,
5
,
3
))
hasher
=
MultiModalHasher
assert
hasher
.
hash_kwargs
(
data
=
arr1
)
!=
hasher
.
hash_kwargs
(
data
=
arr2
)
vllm/multimodal/hasher.py
View file @
99404f53
...
...
@@ -31,16 +31,20 @@ class MultiModalHasher:
return
obj
.
encode
(
"utf-8"
)
if
isinstance
(
obj
,
bytes
):
return
obj
if
isinstance
(
obj
,
Image
.
Image
):
return
obj
.
tobytes
()
if
isinstance
(
obj
,
(
int
,
float
)
):
return
np
.
array
(
obj
)
.
tobytes
()
# Convertible to NumPy arrays
if
isinstance
(
obj
,
Image
.
Image
):
return
cls
.
item_to_bytes
(
"image"
,
np
.
array
(
obj
.
convert
(
"RGBA"
)))
if
isinstance
(
obj
,
torch
.
Tensor
):
obj
=
obj
.
numpy
()
if
isinstance
(
obj
,
(
int
,
float
)):
obj
=
np
.
array
(
obj
)
return
cls
.
item_to_bytes
(
"tensor"
,
obj
.
numpy
())
if
isinstance
(
obj
,
np
.
ndarray
):
return
obj
.
tobytes
()
return
cls
.
item_to_bytes
(
"ndarray"
,
{
"dtype"
:
obj
.
dtype
.
str
,
"shape"
:
obj
.
shape
,
"data"
:
obj
.
data
.
tobytes
(),
})
logger
.
warning
(
"No serialization method found for %s. "
...
...
@@ -53,14 +57,22 @@ class MultiModalHasher:
cls
,
key
:
str
,
obj
:
object
,
)
->
bytes
:
return
b
''
.
join
(
kb
+
vb
for
kb
,
vb
in
cls
.
iter_item_to_bytes
(
key
,
obj
))
@
classmethod
def
iter_item_to_bytes
(
cls
,
key
:
str
,
obj
:
object
,
)
->
Iterable
[
tuple
[
bytes
,
bytes
]]:
# Recursive cases
if
isinstance
(
obj
,
(
list
,
tuple
)):
for
i
,
elem
in
enumerate
(
obj
):
yield
from
cls
.
item_to_bytes
(
f
"
{
key
}
.
{
i
}
"
,
elem
)
yield
from
cls
.
iter_
item_to_bytes
(
f
"
{
key
}
.
{
i
}
"
,
elem
)
elif
isinstance
(
obj
,
dict
):
for
k
,
v
in
obj
.
items
():
yield
from
cls
.
item_to_bytes
(
f
"
{
key
}
.
{
k
}
"
,
v
)
yield
from
cls
.
iter_
item_to_bytes
(
f
"
{
key
}
.
{
k
}
"
,
v
)
else
:
key_bytes
=
cls
.
serialize_item
(
key
)
value_bytes
=
cls
.
serialize_item
(
obj
)
...
...
@@ -71,7 +83,7 @@ class MultiModalHasher:
hasher
=
blake3
()
for
k
,
v
in
kwargs
.
items
():
for
k_bytes
,
v_bytes
in
cls
.
item_to_bytes
(
k
,
v
):
for
k_bytes
,
v_bytes
in
cls
.
iter_
item_to_bytes
(
k
,
v
):
hasher
.
update
(
k_bytes
)
hasher
.
update
(
v_bytes
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment