Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9040151f
Unverified
Commit
9040151f
authored
Mar 19, 2026
by
Flora Feng
Committed by
GitHub
Mar 20, 2026
Browse files
[V0 Deprecation] Deprecate --disable-frontend-multiprocessing (#37612)
Signed-off-by:
sfeng33
<
4florafeng@gmail.com
>
parent
8fbe3f30
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
8 additions
and
73 deletions
+8
-73
tests/distributed/test_pipeline_parallel.py
tests/distributed/test_pipeline_parallel.py
+0
-3
tests/entrypoints/instrumentator/test_basic.py
tests/entrypoints/instrumentator/test_basic.py
+3
-31
tests/entrypoints/instrumentator/test_metrics.py
tests/entrypoints/instrumentator/test_metrics.py
+0
-1
tests/entrypoints/openai/completion/test_completion_with_prompt_embeds.py
...s/openai/completion/test_completion_with_prompt_embeds.py
+2
-5
tests/entrypoints/openai/completion/test_shutdown.py
tests/entrypoints/openai/completion/test_shutdown.py
+0
-1
tests/v1/entrypoints/openai/test_completion.py
tests/v1/entrypoints/openai/test_completion.py
+3
-10
vllm/benchmarks/throughput.py
vllm/benchmarks/throughput.py
+0
-9
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+0
-9
vllm/entrypoints/openai/cli_args.py
vllm/entrypoints/openai/cli_args.py
+0
-3
vllm/entrypoints/openai/run_batch.py
vllm/entrypoints/openai/run_batch.py
+0
-1
No files found.
tests/distributed/test_pipeline_parallel.py
View file @
9040151f
...
...
@@ -319,9 +319,6 @@ def _compare_tp(
pp_env
=
{
"VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL"
:
"1"
,
}
# Temporary. Currently when zeromq + SPMD is used, it does not properly
# terminate because of a Ray Compiled Graph issue.
common_args
.
append
(
"--disable-frontend-multiprocessing"
)
elif
distributed_backend
==
"mp"
:
pp_env
=
None
else
:
...
...
tests/entrypoints/instrumentator/test_basic.py
View file @
9040151f
...
...
@@ -28,7 +28,7 @@ def server_args(request: pytest.FixtureRequest) -> list[str]:
>>> @pytest.mark.parametrize(
>>> "server_args",
>>> [
>>> ["--
disable-frontend-multiprocessing
"],
>>> ["--
max-model-len", "10100
"],
>>> [
>>> "--model=NousResearch/Hermes-3-Llama-3.1-70B",
>>> "--enable-auto-tool-choice",
...
...
@@ -40,7 +40,7 @@ def server_args(request: pytest.FixtureRequest) -> list[str]:
>>> ...
This will run `test_foo` twice with servers with:
- `--
disable-frontend-multiprocessing
`
- `--
max-model-len 10100
`
- `--model=NousResearch/Hermes-3-Llama-3.1-70B --enable-auto-tool-choice`.
"""
...
...
@@ -79,17 +79,6 @@ async def client(server):
yield
async_client
@
pytest
.
mark
.
parametrize
(
"server_args"
,
[
pytest
.
param
([],
id
=
"default-frontend-multiprocessing"
),
pytest
.
param
(
[
"--disable-frontend-multiprocessing"
],
id
=
"disable-frontend-multiprocessing"
,
),
],
indirect
=
True
,
)
@
pytest
.
mark
.
asyncio
async
def
test_show_version
(
server
:
RemoteOpenAIServer
):
response
=
requests
.
get
(
server
.
url_for
(
"version"
))
...
...
@@ -98,17 +87,6 @@ async def test_show_version(server: RemoteOpenAIServer):
assert
response
.
json
()
==
{
"version"
:
VLLM_VERSION
}
@
pytest
.
mark
.
parametrize
(
"server_args"
,
[
pytest
.
param
([],
id
=
"default-frontend-multiprocessing"
),
pytest
.
param
(
[
"--disable-frontend-multiprocessing"
],
id
=
"disable-frontend-multiprocessing"
,
),
],
indirect
=
True
,
)
@
pytest
.
mark
.
asyncio
async
def
test_check_health
(
server
:
RemoteOpenAIServer
):
response
=
requests
.
get
(
server
.
url_for
(
"health"
))
...
...
@@ -119,13 +97,7 @@ async def test_check_health(server: RemoteOpenAIServer):
@
pytest
.
mark
.
parametrize
(
"server_args"
,
[
pytest
.
param
(
[
"--max-model-len"
,
"10100"
],
id
=
"default-frontend-multiprocessing"
),
pytest
.
param
(
[
"--disable-frontend-multiprocessing"
,
"--max-model-len"
,
"10100"
],
id
=
"disable-frontend-multiprocessing"
,
),
pytest
.
param
([
"--max-model-len"
,
"10100"
]),
],
indirect
=
True
,
)
...
...
tests/entrypoints/instrumentator/test_metrics.py
View file @
9040151f
...
...
@@ -50,7 +50,6 @@ def default_server_args():
params
=
[
""
,
"--enable-chunked-prefill"
,
"--disable-frontend-multiprocessing"
,
f
"--show-hidden-metrics-for-version=
{
PREV_MINOR_VERSION
}
"
,
],
)
...
...
tests/entrypoints/openai/completion/test_completion_with_prompt_embeds.py
View file @
9040151f
...
...
@@ -83,11 +83,8 @@ def example_prompt_embeds(hf_runner):
return
[
_encode_embeds
(
item
)
for
item
in
example_embeddings
]
@
pytest
.
fixture
(
scope
=
"module"
,
params
=
[
""
,
"--disable-frontend-multiprocessing"
])
def
server_with_prompt_embeds
(
default_server_args
,
request
):
if
request
.
param
:
default_server_args
.
append
(
request
.
param
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
server_with_prompt_embeds
(
default_server_args
):
with
RemoteOpenAIServer
(
MODEL_NAME
,
default_server_args
)
as
remote_server
:
yield
remote_server
...
...
tests/entrypoints/openai/completion/test_shutdown.py
View file @
9040151f
...
...
@@ -150,7 +150,6 @@ async def test_shutdown_on_engine_failure():
"0.05"
,
"--max-num-seqs"
,
"2"
,
"--disable-frontend-multiprocessing"
,
],
# ROCm: Disable stdout/stderr pipe capture. Subprocess hangs when
# stdout/stderr pipes are enabled during ROCm GPU initialization.
...
...
tests/v1/entrypoints/openai/test_completion.py
View file @
9040151f
...
...
@@ -26,19 +26,12 @@ def default_server_args():
"128"
,
"--enforce-eager"
,
"--enable-prompt-tokens-details"
,
"--no-enable-prefix-caching"
,
]
@
pytest
.
fixture
(
scope
=
"module"
,
params
=
[
[
"--no-enable-prefix-caching"
],
[
"--no-enable-prefix-caching"
,
"--disable-frontend-multiprocessing"
],
],
)
def
server
(
default_server_args
,
request
):
if
request
.
param
:
default_server_args
=
default_server_args
+
request
.
param
@
pytest
.
fixture
(
scope
=
"module"
)
def
server
(
default_server_args
):
with
RemoteOpenAIServer
(
MODEL_NAME
,
default_server_args
)
as
remote_server
:
yield
remote_server
...
...
vllm/benchmarks/throughput.py
View file @
9040151f
...
...
@@ -181,7 +181,6 @@ async def run_vllm_async(
n
:
int
,
engine_args
:
AsyncEngineArgs
,
do_profile
:
bool
,
disable_frontend_multiprocessing
:
bool
=
False
,
disable_detokenize
:
bool
=
False
,
)
->
float
:
from
vllm
import
SamplingParams
...
...
@@ -191,7 +190,6 @@ async def run_vllm_async(
async
with
build_async_engine_client_from_engine_args
(
engine_args
,
disable_frontend_multiprocessing
=
disable_frontend_multiprocessing
,
)
as
llm
:
model_config
=
llm
.
model_config
assert
all
(
...
...
@@ -757,12 +755,6 @@ def add_cli_args(parser: argparse.ArgumentParser):
default
=
False
,
help
=
"Use vLLM async engine rather than LLM class."
,
)
parser
.
add_argument
(
"--disable-frontend-multiprocessing"
,
action
=
"store_true"
,
default
=
False
,
help
=
"Disable decoupled async engine frontend."
,
)
parser
.
add_argument
(
"--disable-detokenize"
,
action
=
"store_true"
,
...
...
@@ -880,7 +872,6 @@ def main(args: argparse.Namespace):
requests
,
args
.
n
,
AsyncEngineArgs
.
from_cli_args
(
args
),
disable_frontend_multiprocessing
=
args
.
disable_frontend_multiprocessing
,
disable_detokenize
=
args
.
disable_detokenize
,
do_profile
=
args
.
profile
,
)
...
...
vllm/entrypoints/openai/api_server.py
View file @
9040151f
...
...
@@ -79,7 +79,6 @@ async def build_async_engine_client(
args
:
Namespace
,
*
,
usage_context
:
UsageContext
=
UsageContext
.
OPENAI_API_SERVER
,
disable_frontend_multiprocessing
:
bool
|
None
=
None
,
client_config
:
dict
[
str
,
Any
]
|
None
=
None
,
)
->
AsyncIterator
[
EngineClient
]:
if
os
.
getenv
(
"VLLM_WORKER_MULTIPROC_METHOD"
)
==
"forkserver"
:
...
...
@@ -98,13 +97,9 @@ async def build_async_engine_client(
engine_args
.
_api_process_count
=
client_config
.
get
(
"client_count"
,
1
)
engine_args
.
_api_process_rank
=
client_config
.
get
(
"client_index"
,
0
)
if
disable_frontend_multiprocessing
is
None
:
disable_frontend_multiprocessing
=
bool
(
args
.
disable_frontend_multiprocessing
)
async
with
build_async_engine_client_from_engine_args
(
engine_args
,
usage_context
=
usage_context
,
disable_frontend_multiprocessing
=
disable_frontend_multiprocessing
,
client_config
=
client_config
,
)
as
engine
:
yield
engine
...
...
@@ -115,7 +110,6 @@ async def build_async_engine_client_from_engine_args(
engine_args
:
AsyncEngineArgs
,
*
,
usage_context
:
UsageContext
=
UsageContext
.
OPENAI_API_SERVER
,
disable_frontend_multiprocessing
:
bool
=
False
,
client_config
:
dict
[
str
,
Any
]
|
None
=
None
,
)
->
AsyncIterator
[
EngineClient
]:
"""
...
...
@@ -129,9 +123,6 @@ async def build_async_engine_client_from_engine_args(
# Create the EngineConfig (determines if we can use V1).
vllm_config
=
engine_args
.
create_engine_config
(
usage_context
=
usage_context
)
if
disable_frontend_multiprocessing
:
logger
.
warning
(
"V1 is enabled, but got --disable-frontend-multiprocessing."
)
from
vllm.v1.engine.async_llm
import
AsyncLLM
async_llm
:
AsyncLLM
|
None
=
None
...
...
vllm/entrypoints/openai/cli_args.py
View file @
9040151f
...
...
@@ -105,9 +105,6 @@ class BaseFrontendArgs:
"""When `--max-logprobs` is specified, represents single tokens as
strings of the form 'token_id:{token_id}' so that tokens that are not
JSON-encodable can be identified."""
disable_frontend_multiprocessing
:
bool
=
False
"""If specified, will run the OpenAI frontend server in the same process as
the model serving engine."""
enable_auto_tool_choice
:
bool
=
False
"""Enable auto tool choice for supported models. Use `--tool-call-parser`
to specify which parser to use."""
...
...
vllm/entrypoints/openai/run_batch.py
View file @
9040151f
...
...
@@ -823,7 +823,6 @@ async def main(args: Namespace):
async
with
build_async_engine_client
(
args
,
usage_context
=
UsageContext
.
OPENAI_BATCH_RUNNER
,
disable_frontend_multiprocessing
=
False
,
)
as
engine_client
:
await
run_batch
(
engine_client
,
args
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment