Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d82b9487
Commit
d82b9487
authored
Mar 18, 2025
by
Simon Mo
Committed by
simon-mo
Mar 18, 2025
Browse files
[Bugfix] Register serializers for V0 MQ Engine (#15009)
Signed-off-by:
simon-mo
<
simon.mo@hey.com
>
parent
be13281d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
3 deletions
+13
-3
vllm/engine/multiprocessing/engine.py
vllm/engine/multiprocessing/engine.py
+8
-3
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+5
-0
No files found.
vllm/engine/multiprocessing/engine.py
View file @
d82b9487
...
@@ -29,6 +29,8 @@ from vllm.engine.multiprocessing import (ENGINE_DEAD_ERROR, IPC_DATA_EXT,
...
@@ -29,6 +29,8 @@ from vllm.engine.multiprocessing import (ENGINE_DEAD_ERROR, IPC_DATA_EXT,
# yapf: enable
# yapf: enable
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.outputs
import
RequestOutput
from
vllm.outputs
import
RequestOutput
from
vllm.transformers_utils.config
import
(
maybe_register_config_serialize_by_value
)
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.worker.model_runner_base
import
InputProcessingError
from
vllm.worker.model_runner_base
import
InputProcessingError
...
@@ -42,12 +44,12 @@ class MQLLMEngine:
...
@@ -42,12 +44,12 @@ class MQLLMEngine:
"""A multiprocessing wrapper for :class:`LLMEngine`.
"""A multiprocessing wrapper for :class:`LLMEngine`.
This class is used to wrap the :class:`LLMEngine` class to enable use
This class is used to wrap the :class:`LLMEngine` class to enable use
in concurrnet manner. It runs a background loop and uses zeromq to
in concurrnet manner. It runs a background loop and uses zeromq to
receive new requests and stream outputs incrementally via ipc.
receive new requests and stream outputs incrementally via ipc.
The :class:`LLMEngine` generate or encode process is kicked off when a new
The :class:`LLMEngine` generate or encode process is kicked off when a new
RPCProcessRequest is received by the input_socket.
RPCProcessRequest is received by the input_socket.
The self.engine_loop checks the input_socket for new requests,
The self.engine_loop checks the input_socket for new requests,
adds them to the LLMEngine if there are any, calls the internal
adds them to the LLMEngine if there are any, calls the internal
:class:`LLMEngine.step()`, and sends the RequestOutputs back over
:class:`LLMEngine.step()`, and sends the RequestOutputs back over
...
@@ -428,6 +430,9 @@ def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext,
...
@@ -428,6 +430,9 @@ def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext,
ipc_path
:
str
,
disable_log_stats
:
bool
,
ipc_path
:
str
,
disable_log_stats
:
bool
,
disable_log_requests
:
bool
,
engine_alive
):
disable_log_requests
:
bool
,
engine_alive
):
try
:
try
:
# Ensure we can serialize transformer config before spawning
maybe_register_config_serialize_by_value
()
engine
=
MQLLMEngine
.
from_vllm_config
(
engine
=
MQLLMEngine
.
from_vllm_config
(
vllm_config
=
vllm_config
,
vllm_config
=
vllm_config
,
usage_context
=
usage_context
,
usage_context
=
usage_context
,
...
...
vllm/entrypoints/openai/api_server.py
View file @
d82b9487
...
@@ -82,6 +82,8 @@ from vllm.entrypoints.openai.serving_transcription import (
...
@@ -82,6 +82,8 @@ from vllm.entrypoints.openai.serving_transcription import (
from
vllm.entrypoints.openai.tool_parsers
import
ToolParserManager
from
vllm.entrypoints.openai.tool_parsers
import
ToolParserManager
from
vllm.entrypoints.utils
import
load_aware_call
,
with_cancellation
from
vllm.entrypoints.utils
import
load_aware_call
,
with_cancellation
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.transformers_utils.config
import
(
maybe_register_config_serialize_by_value
)
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils
import
(
FlexibleArgumentParser
,
get_open_zmq_ipc_path
,
from
vllm.utils
import
(
FlexibleArgumentParser
,
get_open_zmq_ipc_path
,
is_valid_ipv6_address
,
set_ulimit
)
is_valid_ipv6_address
,
set_ulimit
)
...
@@ -221,6 +223,9 @@ async def build_async_engine_client_from_engine_args(
...
@@ -221,6 +223,9 @@ async def build_async_engine_client_from_engine_args(
# so we need to spawn a new process
# so we need to spawn a new process
context
=
multiprocessing
.
get_context
(
"spawn"
)
context
=
multiprocessing
.
get_context
(
"spawn"
)
# Ensure we can serialize transformer config before spawning
maybe_register_config_serialize_by_value
()
# The Process can raise an exception during startup, which may
# The Process can raise an exception during startup, which may
# not actually result in an exitcode being reported. As a result
# not actually result in an exitcode being reported. As a result
# we use a shared variable to communicate the information.
# we use a shared variable to communicate the information.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment