Unverified Commit cd9e5b83 authored by Nikhil G's avatar Nikhil G Committed by GitHub
Browse files

Fix V1 engine serialization error with Ray distributed executor (#26148)


Signed-off-by: default avatarNikhil Ghosh <nikhil@anyscale.com>
parent 300a59c4
...@@ -16,6 +16,7 @@ from vllm.logger import init_logger ...@@ -16,6 +16,7 @@ from vllm.logger import init_logger
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.sequence import ExecuteModelRequest, IntermediateTensors from vllm.sequence import ExecuteModelRequest, IntermediateTensors
from vllm.utils import get_ip from vllm.utils import get_ip
from vllm.v1.outputs import AsyncModelRunnerOutput
from vllm.v1.worker.worker_base import WorkerWrapperBase from vllm.v1.worker.worker_base import WorkerWrapperBase
if TYPE_CHECKING: if TYPE_CHECKING:
...@@ -142,6 +143,11 @@ try: ...@@ -142,6 +143,11 @@ try:
# but may still be finished requests. # but may still be finished requests.
assert not output or not output.req_ids assert not output or not output.req_ids
output = scheduler_output, None output = scheduler_output, None
# Ensure outputs crossing Ray compiled DAG are serializable.
# AsyncModelRunnerOutput holds CUDA events and cannot be
# pickled.
if isinstance(output, AsyncModelRunnerOutput):
output = output.get_output()
return output return output
def override_env_vars(self, vars: Dict[str, str]): def override_env_vars(self, vars: Dict[str, str]):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment