Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ad6c655d
Unverified
Commit
ad6c655d
authored
Aug 06, 2025
by
Lionel Villard
Committed by
GitHub
Aug 06, 2025
Browse files
preload heavy modules when mp method is forkserver (#22214)
Signed-off-by:
Lionel Villard
<
villard@us.ibm.com
>
parent
14bcf93a
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
1 deletion
+13
-1
vllm/benchmarks/latency.py
vllm/benchmarks/latency.py
+3
-1
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+10
-0
No files found.
vllm/benchmarks/latency.py
View file @
ad6c655d
...
...
@@ -13,7 +13,6 @@ import numpy as np
from
tqdm
import
tqdm
import
vllm.envs
as
envs
from
vllm
import
LLM
,
SamplingParams
from
vllm.benchmarks.lib.utils
import
(
convert_to_pytorch_benchmark_format
,
write_to_json
)
from
vllm.engine.arg_utils
import
EngineArgs
...
...
@@ -85,6 +84,9 @@ def main(args: argparse.Namespace):
"Please set it to a valid path to use torch profiler."
)
engine_args
=
EngineArgs
.
from_cli_args
(
args
)
# Lazy import to avoid importing LLM when the bench command is not selected.
from
vllm
import
LLM
,
SamplingParams
# NOTE(woosuk): If the request cannot be processed in a single batch,
# the engine will automatically process the request in multiple batches.
llm
=
LLM
(
**
dataclasses
.
asdict
(
engine_args
))
...
...
vllm/entrypoints/openai/api_server.py
View file @
ad6c655d
...
...
@@ -8,6 +8,7 @@ import importlib
import
inspect
import
json
import
multiprocessing
import
multiprocessing.forkserver
as
forkserver
import
os
import
signal
import
socket
...
...
@@ -155,6 +156,15 @@ async def build_async_engine_client(
client_config
:
Optional
[
dict
[
str
,
Any
]]
=
None
,
)
->
AsyncIterator
[
EngineClient
]:
if
os
.
getenv
(
"VLLM_WORKER_MULTIPROC_METHOD"
)
==
"forkserver"
:
# The executor is expected to be mp.
# Pre-import heavy modules in the forkserver process
logger
.
debug
(
"Setup forkserver with pre-imports"
)
multiprocessing
.
set_start_method
(
'forkserver'
)
multiprocessing
.
set_forkserver_preload
([
"vllm.v1.engine.async_llm"
])
forkserver
.
ensure_running
()
logger
.
debug
(
"Forkserver setup complete!"
)
# Context manager to handle engine_client lifecycle
# Ensures everything is shutdown and cleaned up on error/exit
engine_args
=
AsyncEngineArgs
.
from_cli_args
(
args
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment