Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bc3e4306
Unverified
Commit
bc3e4306
authored
Nov 14, 2025
by
Nick Hill
Committed by
GitHub
Nov 14, 2025
Browse files
[BugFix] Fix multi-modal async scheduling race condition (#28706)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
c36bcfe6
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
10 deletions
+22
-10
vllm/distributed/device_communicators/shm_object_storage.py
vllm/distributed/device_communicators/shm_object_storage.py
+3
-3
vllm/v1/serial_utils.py
vllm/v1/serial_utils.py
+19
-7
No files found.
vllm/distributed/device_communicators/shm_object_storage.py
View file @
bc3e4306
...
...
@@ -342,8 +342,8 @@ class MsgpackSerde(ObjectSerde):
from
vllm.v1.serial_utils
import
MsgpackDecoder
,
MsgpackEncoder
self
.
encoder
=
MsgpackEncoder
()
self
.
tensor_decoder
=
MsgpackDecoder
(
torch
.
Tensor
)
self
.
mm_decoder
=
MsgpackDecoder
(
MultiModalKwargsItem
)
self
.
tensor_decoder
=
MsgpackDecoder
(
torch
.
Tensor
,
share_mem
=
False
)
self
.
mm_decoder
=
MsgpackDecoder
(
MultiModalKwargsItem
,
share_mem
=
False
)
self
.
_mm_kwargs_item_cls
=
MultiModalKwargsItem
def
serialize
(
self
,
value
:
Any
)
->
tuple
[
bytes
|
list
[
bytes
],
int
,
bytes
,
int
]:
...
...
@@ -368,7 +368,7 @@ class MsgpackSerde(ObjectSerde):
# pickle.loads do not read past the end of a pickled object
# within a large buffer, so we can skip storing the metadata size
type_name
,
nbytes
,
len_arr
=
pickle
.
loads
(
data_view
)
serialized_data
=
bytearray
(
data_view
[
-
nbytes
:]
)
serialized_data
=
data_view
[
-
nbytes
:]
if
type_name
==
torch
.
Tensor
.
__name__
:
obj
=
[]
...
...
vllm/v1/serial_utils.py
View file @
bc3e4306
...
...
@@ -31,6 +31,7 @@ from vllm.multimodal.inputs import (
MultiModalSharedField
,
NestedTensors
,
)
from
vllm.utils.platform_utils
import
is_pin_memory_available
from
vllm.v1.engine
import
UtilityResult
from
vllm.v1.utils
import
tensor_data
...
...
@@ -282,7 +283,9 @@ class MsgpackDecoder:
not thread-safe when encoding tensors / numpy arrays.
"""
def
__init__
(
self
,
t
:
Any
|
None
=
None
):
def
__init__
(
self
,
t
:
Any
|
None
=
None
,
share_mem
:
bool
=
True
):
self
.
share_mem
=
share_mem
self
.
pin_tensors
=
is_pin_memory_available
()
args
=
()
if
t
is
None
else
(
t
,)
self
.
decoder
=
msgpack
.
Decoder
(
*
args
,
ext_hook
=
self
.
ext_hook
,
dec_hook
=
self
.
dec_hook
...
...
@@ -347,21 +350,30 @@ class MsgpackDecoder:
# zero-copy decode. We assume the ndarray will not be kept around,
# as it now locks the whole received message buffer in memory.
buffer
=
self
.
aux_buffers
[
data
]
if
isinstance
(
data
,
int
)
else
data
return
np
.
frombuffer
(
buffer
,
dtype
=
dtype
).
reshape
(
shape
)
arr
=
np
.
frombuffer
(
buffer
,
dtype
=
dtype
)
if
not
self
.
share_mem
:
arr
=
arr
.
copy
()
return
arr
.
reshape
(
shape
)
def
_decode_tensor
(
self
,
arr
:
Any
)
->
torch
.
Tensor
:
dtype
,
shape
,
data
=
arr
# Copy from inline representation, to decouple the memory storage
# of the message from the original buffer. And also make Torch
# not complain about a readonly memoryview.
buffer
=
self
.
aux_buffers
[
data
]
if
isinstance
(
data
,
int
)
else
bytearray
(
data
)
is_aux
=
isinstance
(
data
,
int
)
buffer
=
self
.
aux_buffers
[
data
]
if
is_aux
else
data
buffer
=
buffer
if
isinstance
(
buffer
,
memoryview
)
else
memoryview
(
buffer
)
torch_dtype
=
getattr
(
torch
,
dtype
)
assert
isinstance
(
torch_dtype
,
torch
.
dtype
)
if
not
buffer
:
# torch.frombuffer doesn't like empty buffers
if
not
buffer
.
nbytes
:
# torch.frombuffer doesn't like empty buffers
assert
0
in
shape
return
torch
.
empty
(
shape
,
dtype
=
torch_dtype
)
# Create uint8 array
arr
=
torch
.
frombuffer
(
buffer
,
dtype
=
torch
.
uint8
)
# Clone ensures tensor is backed by pytorch-owned memory for safe
# future async CPU->GPU transfer.
# Pin larger tensors for more efficient CPU->GPU transfer.
if
not
is_aux
:
arr
=
arr
.
clone
()
elif
not
self
.
share_mem
:
arr
=
arr
.
pin_memory
()
if
self
.
pin_tensors
else
arr
.
clone
()
# Convert back to proper shape & type
return
arr
.
view
(
torch_dtype
).
view
(
shape
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment