Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6da00785
Unverified
Commit
6da00785
authored
Jul 24, 2025
by
Chauncey
Committed by
GitHub
Jul 24, 2025
Browse files
[Feat] Allow custom naming of vLLM processes (#21445)
Signed-off-by:
chaunceyjiang
<
chaunceyjiang@gmail.com
>
parent
73e3949d
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
46 additions
and
17 deletions
+46
-17
requirements/common.txt
requirements/common.txt
+1
-0
requirements/docs.txt
requirements/docs.txt
+1
-0
vllm/entrypoints/cli/serve.py
vllm/entrypoints/cli/serve.py
+2
-2
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+4
-3
vllm/envs.py
vllm/envs.py
+6
-0
vllm/utils/__init__.py
vllm/utils/__init__.py
+14
-0
vllm/v1/engine/coordinator.py
vllm/v1/engine/coordinator.py
+6
-5
vllm/v1/engine/core.py
vllm/v1/engine/core.py
+3
-1
vllm/v1/executor/multiproc_executor.py
vllm/v1/executor/multiproc_executor.py
+6
-3
vllm/v1/utils.py
vllm/v1/utils.py
+3
-3
No files found.
requirements/common.txt
View file @
6da00785
...
@@ -48,3 +48,4 @@ scipy # Required for phi-4-multimodal-instruct
...
@@ -48,3 +48,4 @@ scipy # Required for phi-4-multimodal-instruct
ninja # Required for xgrammar, rocm, tpu, xpu
ninja # Required for xgrammar, rocm, tpu, xpu
pybase64 # fast base64 implementation
pybase64 # fast base64 implementation
cbor2 # Required for cross-language serialization of hashable objects
cbor2 # Required for cross-language serialization of hashable objects
setproctitle # Used to set process names for better debugging and monitoring
requirements/docs.txt
View file @
6da00785
...
@@ -22,6 +22,7 @@ pillow
...
@@ -22,6 +22,7 @@ pillow
psutil
psutil
pybase64
pybase64
pydantic
pydantic
setproctitle
torch
torch
transformers
transformers
zmq
zmq
vllm/entrypoints/cli/serve.py
View file @
6da00785
...
@@ -21,7 +21,7 @@ from vllm.entrypoints.utils import (VLLM_SUBCMD_PARSER_EPILOG,
...
@@ -21,7 +21,7 @@ from vllm.entrypoints.utils import (VLLM_SUBCMD_PARSER_EPILOG,
from
vllm.executor.multiproc_worker_utils
import
_add_prefix
from
vllm.executor.multiproc_worker_utils
import
_add_prefix
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils
import
FlexibleArgumentParser
,
get_tcp_uri
from
vllm.utils
import
FlexibleArgumentParser
,
bind_process_name
,
get_tcp_uri
from
vllm.v1.engine.core
import
EngineCoreProc
from
vllm.v1.engine.core
import
EngineCoreProc
from
vllm.v1.engine.utils
import
CoreEngineProcManager
,
launch_core_engines
from
vllm.v1.engine.utils
import
CoreEngineProcManager
,
launch_core_engines
from
vllm.v1.executor.abstract
import
Executor
from
vllm.v1.executor.abstract
import
Executor
...
@@ -77,7 +77,7 @@ def run_headless(args: argparse.Namespace):
...
@@ -77,7 +77,7 @@ def run_headless(args: argparse.Namespace):
if
args
.
api_server_count
>
1
:
if
args
.
api_server_count
>
1
:
raise
ValueError
(
"api_server_count can't be set in headless mode"
)
raise
ValueError
(
"api_server_count can't be set in headless mode"
)
bind_process_name
(
"APIServer_Headless"
)
# Create the EngineConfig.
# Create the EngineConfig.
engine_args
=
vllm
.
AsyncEngineArgs
.
from_cli_args
(
args
)
engine_args
=
vllm
.
AsyncEngineArgs
.
from_cli_args
(
args
)
usage_context
=
UsageContext
.
OPENAI_API_SERVER
usage_context
=
UsageContext
.
OPENAI_API_SERVER
...
...
vllm/entrypoints/openai/api_server.py
View file @
6da00785
...
@@ -101,8 +101,9 @@ from vllm.transformers_utils.config import (
...
@@ -101,8 +101,9 @@ from vllm.transformers_utils.config import (
maybe_register_config_serialize_by_value
)
maybe_register_config_serialize_by_value
)
from
vllm.transformers_utils.tokenizer
import
MistralTokenizer
from
vllm.transformers_utils.tokenizer
import
MistralTokenizer
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils
import
(
Device
,
FlexibleArgumentParser
,
get_open_zmq_ipc_path
,
from
vllm.utils
import
(
Device
,
FlexibleArgumentParser
,
bind_process_name
,
is_valid_ipv6_address
,
set_ulimit
)
get_open_zmq_ipc_path
,
is_valid_ipv6_address
,
set_ulimit
)
from
vllm.v1.metrics.prometheus
import
get_prometheus_registry
from
vllm.v1.metrics.prometheus
import
get_prometheus_registry
from
vllm.version
import
__version__
as
VLLM_VERSION
from
vllm.version
import
__version__
as
VLLM_VERSION
...
@@ -1804,7 +1805,7 @@ async def run_server_worker(listen_address,
...
@@ -1804,7 +1805,7 @@ async def run_server_worker(listen_address,
ToolParserManager
.
import_tool_parser
(
args
.
tool_parser_plugin
)
ToolParserManager
.
import_tool_parser
(
args
.
tool_parser_plugin
)
server_index
=
client_config
.
get
(
"client_index"
,
0
)
if
client_config
else
0
server_index
=
client_config
.
get
(
"client_index"
,
0
)
if
client_config
else
0
bind_process_name
(
"APIServer"
,
str
(
server_index
))
# Load logging config for uvicorn if specified
# Load logging config for uvicorn if specified
log_config
=
load_log_config
(
args
.
log_config_file
)
log_config
=
load_log_config
(
args
.
log_config_file
)
if
log_config
is
not
None
:
if
log_config
is
not
None
:
...
...
vllm/envs.py
View file @
6da00785
...
@@ -985,6 +985,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
...
@@ -985,6 +985,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
# Used to force set up loopback IP
# Used to force set up loopback IP
"VLLM_LOOPBACK_IP"
:
"VLLM_LOOPBACK_IP"
:
lambda
:
os
.
getenv
(
"VLLM_LOOPBACK_IP"
,
""
),
lambda
:
os
.
getenv
(
"VLLM_LOOPBACK_IP"
,
""
),
# Used to set the process name prefix for vLLM processes.
# This is useful for debugging and monitoring purposes.
# The default value is "VLLM".
"VLLM_PROCESS_NAME_PREFIX"
:
lambda
:
os
.
getenv
(
"VLLM_PROCESS_NAME_PREFIX"
,
"VLLM"
),
}
}
# --8<-- [end:env-vars-definition]
# --8<-- [end:env-vars-definition]
...
...
vllm/utils/__init__.py
View file @
6da00785
...
@@ -58,6 +58,7 @@ import numpy as np
...
@@ -58,6 +58,7 @@ import numpy as np
import
numpy.typing
as
npt
import
numpy.typing
as
npt
import
psutil
import
psutil
import
regex
as
re
import
regex
as
re
import
setproctitle
import
torch
import
torch
import
torch.types
import
torch.types
import
yaml
import
yaml
...
@@ -3278,3 +3279,16 @@ def has_deep_gemm() -> bool:
...
@@ -3278,3 +3279,16 @@ def has_deep_gemm() -> bool:
"""Whether the optional `deep_gemm` package is available."""
"""Whether the optional `deep_gemm` package is available."""
return
_has_module
(
"deep_gemm"
)
return
_has_module
(
"deep_gemm"
)
def
bind_process_name
(
name
:
str
,
suffix
:
str
=
""
)
->
None
:
"""Bind the process name to a specific name with an optional suffix.
Args:
name: The base name to bind the process to.
suffix: An optional suffix to append to the base name.
"""
name
=
f
"
{
envs
.
VLLM_PROCESS_NAME_PREFIX
}
::
{
name
}
"
if
suffix
:
name
=
f
"
{
name
}
_
{
suffix
}
"
setproctitle
.
setproctitle
(
name
)
vllm/v1/engine/coordinator.py
View file @
6da00785
...
@@ -13,7 +13,8 @@ from vllm.logger import init_logger
...
@@ -13,7 +13,8 @@ from vllm.logger import init_logger
from
vllm.utils
import
get_mp_context
,
make_zmq_socket
from
vllm.utils
import
get_mp_context
,
make_zmq_socket
from
vllm.v1.engine
import
EngineCoreOutputs
,
EngineCoreRequestType
from
vllm.v1.engine
import
EngineCoreOutputs
,
EngineCoreRequestType
from
vllm.v1.serial_utils
import
MsgpackDecoder
from
vllm.v1.serial_utils
import
MsgpackDecoder
from
vllm.v1.utils
import
get_engine_client_zmq_addr
,
shutdown
from
vllm.v1.utils
import
(
bind_process_name
,
get_engine_client_zmq_addr
,
shutdown
)
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
@@ -79,7 +80,7 @@ class DPCoordinator:
...
@@ -79,7 +80,7 @@ class DPCoordinator:
context
=
get_mp_context
()
context
=
get_mp_context
()
self
.
proc
:
multiprocessing
.
Process
=
context
.
Process
(
self
.
proc
:
multiprocessing
.
Process
=
context
.
Process
(
target
=
CoordinatorProc
.
run_coordinator
,
target
=
DP
CoordinatorProc
.
run_coordinator
,
name
=
"VLLM_DP_Coordinator"
,
name
=
"VLLM_DP_Coordinator"
,
kwargs
=
{
kwargs
=
{
"engine_count"
:
parallel_config
.
data_parallel_size
,
"engine_count"
:
parallel_config
.
data_parallel_size
,
...
@@ -113,12 +114,12 @@ class EngineState:
...
@@ -113,12 +114,12 @@ class EngineState:
self
.
request_counts
=
[
0
,
0
]
# [waiting, running]
self
.
request_counts
=
[
0
,
0
]
# [waiting, running]
class
CoordinatorProc
:
class
DP
CoordinatorProc
:
def
__init__
(
self
,
def
__init__
(
self
,
engine_count
:
int
,
engine_count
:
int
,
min_stats_update_interval_ms
:
int
=
100
):
min_stats_update_interval_ms
:
int
=
100
):
bind_process_name
(
self
.
__class__
.
__name__
)
self
.
ctx
=
zmq
.
Context
()
self
.
ctx
=
zmq
.
Context
()
self
.
engines
=
[
EngineState
()
for
_
in
range
(
engine_count
)]
self
.
engines
=
[
EngineState
()
for
_
in
range
(
engine_count
)]
...
@@ -137,7 +138,7 @@ class CoordinatorProc:
...
@@ -137,7 +138,7 @@ class CoordinatorProc:
back_publish_address
:
str
,
back_publish_address
:
str
,
min_stats_update_interval_ms
:
int
=
100
,
min_stats_update_interval_ms
:
int
=
100
,
):
):
coordinator
=
CoordinatorProc
(
coordinator
=
DP
CoordinatorProc
(
engine_count
=
engine_count
,
engine_count
=
engine_count
,
min_stats_update_interval_ms
=
min_stats_update_interval_ms
)
min_stats_update_interval_ms
=
min_stats_update_interval_ms
)
try
:
try
:
...
...
vllm/v1/engine/core.py
View file @
6da00785
...
@@ -25,7 +25,8 @@ from vllm.logging_utils.dump_input import dump_engine_exception
...
@@ -25,7 +25,8 @@ from vllm.logging_utils.dump_input import dump_engine_exception
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.transformers_utils.config
import
(
from
vllm.transformers_utils.config
import
(
maybe_register_config_serialize_by_value
)
maybe_register_config_serialize_by_value
)
from
vllm.utils
import
make_zmq_socket
,
resolve_obj_by_qualname
from
vllm.utils
import
(
bind_process_name
,
make_zmq_socket
,
resolve_obj_by_qualname
)
from
vllm.v1.core.kv_cache_utils
import
(
get_kv_cache_config
,
from
vllm.v1.core.kv_cache_utils
import
(
get_kv_cache_config
,
unify_kv_cache_configs
)
unify_kv_cache_configs
)
from
vllm.v1.core.sched.interface
import
SchedulerInterface
from
vllm.v1.core.sched.interface
import
SchedulerInterface
...
@@ -411,6 +412,7 @@ class EngineCoreProc(EngineCore):
...
@@ -411,6 +412,7 @@ class EngineCoreProc(EngineCore):
client_handshake_address
:
Optional
[
str
]
=
None
,
client_handshake_address
:
Optional
[
str
]
=
None
,
engine_index
:
int
=
0
,
engine_index
:
int
=
0
,
):
):
bind_process_name
(
self
.
__class__
.
__name__
,
f
"
{
engine_index
}
"
)
self
.
input_queue
=
queue
.
Queue
[
tuple
[
EngineCoreRequestType
,
Any
]]()
self
.
input_queue
=
queue
.
Queue
[
tuple
[
EngineCoreRequestType
,
Any
]]()
self
.
output_queue
=
queue
.
Queue
[
Union
[
tuple
[
int
,
EngineCoreOutputs
],
self
.
output_queue
=
queue
.
Queue
[
Union
[
tuple
[
int
,
EngineCoreOutputs
],
bytes
]]()
bytes
]]()
...
...
vllm/v1/executor/multiproc_executor.py
View file @
6da00785
...
@@ -30,8 +30,8 @@ from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator
...
@@ -30,8 +30,8 @@ from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator
from
vllm.executor.multiproc_worker_utils
import
(
from
vllm.executor.multiproc_worker_utils
import
(
_add_prefix
,
set_multiprocessing_worker_envs
)
_add_prefix
,
set_multiprocessing_worker_envs
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.utils
import
(
get_distributed_init_method
,
get_loopback_ip
,
from
vllm.utils
import
(
bind_process_name
,
get_distributed_init_method
,
get_mp_context
,
get_open_port
)
get_loopback_ip
,
get_mp_context
,
get_open_port
)
from
vllm.v1.executor.abstract
import
Executor
,
FailureCallback
from
vllm.v1.executor.abstract
import
Executor
,
FailureCallback
from
vllm.v1.outputs
import
ModelRunnerOutput
from
vllm.v1.outputs
import
ModelRunnerOutput
from
vllm.worker.worker_base
import
WorkerWrapperBase
from
vllm.worker.worker_base
import
WorkerWrapperBase
...
@@ -365,7 +365,10 @@ class WorkerProc:
...
@@ -365,7 +365,10 @@ class WorkerProc:
}
}
wrapper
.
init_worker
(
all_kwargs
)
wrapper
.
init_worker
(
all_kwargs
)
self
.
worker
=
wrapper
self
.
worker
=
wrapper
bind_process_name
(
self
.
worker
.
worker
.
__class__
.
__name__
,
f
"TP
{
self
.
rank
}
_DP
{
vllm_config
.
parallel_config
.
data_parallel_rank
}
"
)
pid
=
os
.
getpid
()
pid
=
os
.
getpid
()
_add_prefix
(
sys
.
stdout
,
f
"VllmWorker rank=
{
rank
}
"
,
pid
)
_add_prefix
(
sys
.
stdout
,
f
"VllmWorker rank=
{
rank
}
"
,
pid
)
_add_prefix
(
sys
.
stderr
,
f
"VllmWorker rank=
{
rank
}
"
,
pid
)
_add_prefix
(
sys
.
stderr
,
f
"VllmWorker rank=
{
rank
}
"
,
pid
)
...
...
vllm/v1/utils.py
View file @
6da00785
...
@@ -15,8 +15,8 @@ import torch
...
@@ -15,8 +15,8 @@ import torch
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.usage.usage_lib
import
(
UsageContext
,
is_usage_stats_enabled
,
from
vllm.usage.usage_lib
import
(
UsageContext
,
is_usage_stats_enabled
,
usage_message
)
usage_message
)
from
vllm.utils
import
(
get_open_port
,
get_open_zmq_ipc_path
,
get_tcp_uri
,
from
vllm.utils
import
(
bind_process_name
,
get_open_port
,
kill_process_tree
)
get_open_zmq_ipc_path
,
get_tcp_uri
,
kill_process_tree
)
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
vllm.v1.engine.coordinator
import
DPCoordinator
from
vllm.v1.engine.coordinator
import
DPCoordinator
...
@@ -144,7 +144,7 @@ class APIServerProcessManager:
...
@@ -144,7 +144,7 @@ class APIServerProcessManager:
self
.
listen_address
=
listen_address
self
.
listen_address
=
listen_address
self
.
sock
=
sock
self
.
sock
=
sock
self
.
args
=
args
self
.
args
=
args
bind_process_name
(
self
.
__class__
.
__name__
)
# Start API servers
# Start API servers
spawn_context
=
multiprocessing
.
get_context
(
"spawn"
)
spawn_context
=
multiprocessing
.
get_context
(
"spawn"
)
self
.
processes
:
list
[
BaseProcess
]
=
[]
self
.
processes
:
list
[
BaseProcess
]
=
[]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment