Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e6680f9e
Unverified
Commit
e6680f9e
authored
Aug 01, 2025
by
wuhang
Committed by
GitHub
Aug 01, 2025
Browse files
[Bugfix] Add log prefix in non-dp mode engine core (#21889)
Signed-off-by:
wuhang
<
wuhang6@huawei.com
>
parent
27a145e8
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
75 additions
and
81 deletions
+75
-81
vllm/entrypoints/cli/serve.py
vllm/entrypoints/cli/serve.py
+2
-9
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+4
-8
vllm/executor/multiproc_worker_utils.py
vllm/executor/multiproc_worker_utils.py
+4
-38
vllm/utils/__init__.py
vllm/utils/__init__.py
+54
-1
vllm/v1/engine/core.py
vllm/v1/engine/core.py
+4
-18
vllm/v1/executor/multiproc_executor.py
vllm/v1/executor/multiproc_executor.py
+7
-7
No files found.
vllm/entrypoints/cli/serve.py
View file @
e6680f9e
...
...
@@ -2,9 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
argparse
import
os
import
signal
import
sys
from
typing
import
Optional
import
uvloop
...
...
@@ -18,10 +16,9 @@ from vllm.entrypoints.openai.cli_args import (make_arg_parser,
validate_parsed_serve_args
)
from
vllm.entrypoints.utils
import
(
VLLM_SUBCMD_PARSER_EPILOG
,
show_filtered_argument_or_group_from_help
)
from
vllm.executor.multiproc_worker_utils
import
_add_prefix
from
vllm.logger
import
init_logger
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils
import
FlexibleArgumentParser
,
get_tcp_uri
from
vllm.utils
import
FlexibleArgumentParser
,
decorate_logs
,
get_tcp_uri
from
vllm.v1.engine.core
import
EngineCoreProc
from
vllm.v1.engine.utils
import
CoreEngineProcManager
,
launch_core_engines
from
vllm.v1.executor.abstract
import
Executor
...
...
@@ -229,11 +226,7 @@ def run_api_server_worker_proc(listen_address,
"""Entrypoint for individual API server worker processes."""
# Add process-specific prefix to stdout and stderr.
from
multiprocessing
import
current_process
process_name
=
current_process
().
name
pid
=
os
.
getpid
()
_add_prefix
(
sys
.
stdout
,
process_name
,
pid
)
_add_prefix
(
sys
.
stderr
,
process_name
,
pid
)
decorate_logs
()
uvloop
.
run
(
run_server_worker
(
listen_address
,
sock
,
args
,
client_config
,
...
...
vllm/entrypoints/openai/api_server.py
View file @
e6680f9e
...
...
@@ -11,7 +11,6 @@ import multiprocessing
import
os
import
signal
import
socket
import
sys
import
tempfile
import
uuid
from
argparse
import
Namespace
...
...
@@ -95,15 +94,15 @@ from vllm.entrypoints.openai.serving_transcription import (
from
vllm.entrypoints.openai.tool_parsers
import
ToolParserManager
from
vllm.entrypoints.utils
import
(
cli_env_setup
,
load_aware_call
,
log_non_default_args
,
with_cancellation
)
from
vllm.executor.multiproc_worker_utils
import
_add_prefix
from
vllm.logger
import
init_logger
from
vllm.reasoning
import
ReasoningParserManager
from
vllm.transformers_utils.config
import
(
maybe_register_config_serialize_by_value
)
from
vllm.transformers_utils.tokenizer
import
MistralTokenizer
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils
import
(
Device
,
FlexibleArgumentParser
,
get_open_zmq_ipc_path
,
is_valid_ipv6_address
,
set_process_title
,
set_ulimit
)
from
vllm.utils
import
(
Device
,
FlexibleArgumentParser
,
decorate_logs
,
get_open_zmq_ipc_path
,
is_valid_ipv6_address
,
set_process_title
,
set_ulimit
)
from
vllm.v1.metrics.prometheus
import
get_prometheus_registry
from
vllm.version
import
__version__
as
VLLM_VERSION
...
...
@@ -1808,10 +1807,7 @@ async def run_server(args, **uvicorn_kwargs) -> None:
"""Run a single-worker API server."""
# Add process-specific prefix to stdout and stderr.
process_name
=
"APIServer"
pid
=
os
.
getpid
()
_add_prefix
(
sys
.
stdout
,
process_name
,
pid
)
_add_prefix
(
sys
.
stderr
,
process_name
,
pid
)
decorate_logs
(
"APIServer"
)
listen_address
,
sock
=
setup_server
(
args
)
await
run_server_worker
(
listen_address
,
sock
,
args
,
**
uvicorn_kwargs
)
...
...
vllm/executor/multiproc_worker_utils.py
View file @
e6680f9e
...
...
@@ -3,21 +3,20 @@
import
asyncio
import
os
import
sys
import
threading
import
uuid
from
dataclasses
import
dataclass
from
multiprocessing
import
Queue
from
multiprocessing.connection
import
wait
from
multiprocessing.process
import
BaseProcess
from
typing
import
(
Any
,
Callable
,
Dict
,
Generic
,
List
,
Optional
,
TextIO
,
TypeVar
,
Union
)
from
typing
import
Any
,
Callable
,
Dict
,
Generic
,
List
,
Optional
,
TypeVar
,
Union
import
torch
from
vllm.config
import
VllmConfig
from
vllm.logger
import
init_logger
from
vllm.utils
import
_maybe_force_spawn
,
get_mp_context
,
run_method
from
vllm.utils
import
(
_maybe_force_spawn
,
decorate_logs
,
get_mp_context
,
run_method
)
logger
=
init_logger
(
__name__
)
...
...
@@ -25,10 +24,6 @@ T = TypeVar('T')
_TERMINATE
=
"TERMINATE"
# sentinel
# ANSI color codes
CYAN
=
'
\033
[1;36m'
RESET
=
'
\033
[0;0m'
JOIN_TIMEOUT_S
=
2
...
...
@@ -213,9 +208,7 @@ def _run_worker_process(
# Add process-specific prefix to stdout and stderr
process_name
=
get_mp_context
().
current_process
().
name
pid
=
os
.
getpid
()
_add_prefix
(
sys
.
stdout
,
process_name
,
pid
)
_add_prefix
(
sys
.
stderr
,
process_name
,
pid
)
decorate_logs
(
process_name
)
# Initialize worker
worker
=
worker_factory
(
vllm_config
,
rank
)
...
...
@@ -260,33 +253,6 @@ def _run_worker_process(
logger
.
info
(
"Worker exiting"
)
def
_add_prefix
(
file
:
TextIO
,
worker_name
:
str
,
pid
:
int
)
->
None
:
"""Prepend each output line with process-specific prefix"""
prefix
=
f
"
{
CYAN
}
(
{
worker_name
}
pid=
{
pid
}
)
{
RESET
}
"
file_write
=
file
.
write
def
write_with_prefix
(
s
:
str
):
if
not
s
:
return
if
file
.
start_new_line
:
# type: ignore[attr-defined]
file_write
(
prefix
)
idx
=
0
while
(
next_idx
:
=
s
.
find
(
'
\n
'
,
idx
))
!=
-
1
:
next_idx
+=
1
file_write
(
s
[
idx
:
next_idx
])
if
next_idx
==
len
(
s
):
file
.
start_new_line
=
True
# type: ignore[attr-defined]
return
file_write
(
prefix
)
idx
=
next_idx
file_write
(
s
[
idx
:])
file
.
start_new_line
=
False
# type: ignore[attr-defined]
file
.
start_new_line
=
True
# type: ignore[attr-defined]
file
.
write
=
write_with_prefix
# type: ignore[method-assign]
def
set_multiprocessing_worker_envs
(
parallel_config
):
""" Set up environment variables that should be used when there are workers
in a multiprocessing environment. This should be called by the parent
...
...
vllm/utils/__init__.py
View file @
e6680f9e
...
...
@@ -47,7 +47,7 @@ from dataclasses import dataclass, field
from
functools
import
cache
,
lru_cache
,
partial
,
wraps
from
types
import
MappingProxyType
from
typing
import
(
TYPE_CHECKING
,
Any
,
Callable
,
Generic
,
Literal
,
NamedTuple
,
Optional
,
Tuple
,
TypeVar
,
Union
,
cast
,
overload
)
Optional
,
TextIO
,
Tuple
,
TypeVar
,
Union
,
cast
,
overload
)
from
urllib.parse
import
urlparse
from
uuid
import
uuid4
...
...
@@ -167,6 +167,10 @@ GB_bytes = 1_000_000_000
GiB_bytes
=
1
<<
30
"""The number of bytes in one gibibyte (GiB)."""
# ANSI color codes
CYAN
=
'
\033
[1;36m'
RESET
=
'
\033
[0;0m'
STR_DTYPE_TO_TORCH_DTYPE
=
{
"half"
:
torch
.
half
,
"bfloat16"
:
torch
.
bfloat16
,
...
...
@@ -3258,3 +3262,52 @@ def set_process_title(name: str,
else
:
name
=
f
"
{
envs
.
VLLM_PROCESS_NAME_PREFIX
}
::
{
name
}
"
setproctitle
.
setproctitle
(
name
)
def
_add_prefix
(
file
:
TextIO
,
worker_name
:
str
,
pid
:
int
)
->
None
:
"""Prepend each output line with process-specific prefix"""
prefix
=
f
"
{
CYAN
}
(
{
worker_name
}
pid=
{
pid
}
)
{
RESET
}
"
file_write
=
file
.
write
def
write_with_prefix
(
s
:
str
):
if
not
s
:
return
if
file
.
start_new_line
:
# type: ignore[attr-defined]
file_write
(
prefix
)
idx
=
0
while
(
next_idx
:
=
s
.
find
(
'
\n
'
,
idx
))
!=
-
1
:
next_idx
+=
1
file_write
(
s
[
idx
:
next_idx
])
if
next_idx
==
len
(
s
):
file
.
start_new_line
=
True
# type: ignore[attr-defined]
return
file_write
(
prefix
)
idx
=
next_idx
file_write
(
s
[
idx
:])
file
.
start_new_line
=
False
# type: ignore[attr-defined]
file
.
start_new_line
=
True
# type: ignore[attr-defined]
file
.
write
=
write_with_prefix
# type: ignore[method-assign]
def
decorate_logs
(
process_name
:
Optional
[
str
]
=
None
)
->
None
:
"""
Adds a process-specific prefix to each line of output written to stdout and
stderr.
This function is intended to be called before initializing the api_server,
engine_core, or worker classes, so that all subsequent output from the
process is prefixed with the process name and PID. This helps distinguish
log output from different processes in multi-process environments.
Args:
process_name: Optional; the name of the process to use in the prefix.
If not provided, the current process name from the multiprocessing
context is used.
"""
if
process_name
is
None
:
process_name
=
get_mp_context
().
current_process
().
name
pid
=
os
.
getpid
()
_add_prefix
(
sys
.
stdout
,
process_name
,
pid
)
_add_prefix
(
sys
.
stderr
,
process_name
,
pid
)
vllm/v1/engine/core.py
View file @
e6680f9e
...
...
@@ -3,7 +3,6 @@
import
os
import
queue
import
signal
import
sys
import
threading
import
time
from
collections
import
deque
...
...
@@ -19,15 +18,14 @@ import zmq
from
vllm.config
import
ParallelConfig
,
VllmConfig
from
vllm.distributed
import
stateless_destroy_torch_distributed_process_group
from
vllm.executor.multiproc_worker_utils
import
_add_prefix
from
vllm.logger
import
init_logger
from
vllm.logging_utils.dump_input
import
dump_engine_exception
from
vllm.lora.request
import
LoRARequest
from
vllm.tasks
import
POOLING_TASKS
,
SupportedTask
from
vllm.transformers_utils.config
import
(
maybe_register_config_serialize_by_value
)
from
vllm.utils
import
(
make_zmq_socket
,
resolve_obj_by_qualname
,
set_process_title
)
from
vllm.utils
import
(
decorate_logs
,
make_zmq_socket
,
resolve_obj_by_qualname
,
set_process_title
)
from
vllm.v1.core.kv_cache_utils
import
(
get_kv_cache_config
,
unify_kv_cache_configs
)
from
vllm.v1.core.sched.interface
import
SchedulerInterface
...
...
@@ -649,12 +647,14 @@ class EngineCoreProc(EngineCore):
"vllm_config"
].
parallel_config
if
parallel_config
.
data_parallel_size
>
1
or
dp_rank
>
0
:
set_process_title
(
"DPEngineCore"
,
str
(
dp_rank
))
decorate_logs
()
# Set data parallel rank for this engine process.
parallel_config
.
data_parallel_rank
=
dp_rank
parallel_config
.
data_parallel_rank_local
=
local_dp_rank
engine_core
=
DPEngineCoreProc
(
*
args
,
**
kwargs
)
else
:
set_process_title
(
"EngineCore"
)
decorate_logs
()
engine_core
=
EngineCoreProc
(
*
args
,
**
kwargs
)
engine_core
.
run_busy_loop
()
...
...
@@ -905,8 +905,6 @@ class DPEngineCoreProc(EngineCoreProc):
log_stats
:
bool
,
client_handshake_address
:
Optional
[
str
]
=
None
,
):
self
.
_decorate_logs
()
# Counts forward-passes of the model so that we can synchronize
# finished with DP peers every N steps.
self
.
counter
=
0
...
...
@@ -919,15 +917,6 @@ class DPEngineCoreProc(EngineCoreProc):
executor_class
,
log_stats
,
client_handshake_address
,
dp_rank
)
def
_decorate_logs
(
self
):
# Add process-specific prefix to stdout and stderr before
# we initialize the engine.
from
multiprocessing
import
current_process
process_name
=
current_process
().
name
pid
=
os
.
getpid
()
_add_prefix
(
sys
.
stdout
,
process_name
,
pid
)
_add_prefix
(
sys
.
stderr
,
process_name
,
pid
)
def
_init_data_parallel
(
self
,
vllm_config
:
VllmConfig
):
# Configure GPUs and stateless process group for data parallel.
...
...
@@ -1149,9 +1138,6 @@ class DPEngineCoreActor(DPEngineCoreProc):
f
"
{
(
local_dp_rank
+
1
)
*
world_size
}
) "
f
"base value:
\"
{
os
.
getenv
(
device_control_env_var
)
}
\"
"
)
from
e
def
_decorate_logs
(
self
):
pass
@
contextmanager
def
_perform_handshakes
(
self
,
handshake_address
:
str
,
identity
:
bytes
,
local_client
:
bool
,
vllm_config
:
VllmConfig
,
...
...
vllm/v1/executor/multiproc_executor.py
View file @
e6680f9e
...
...
@@ -4,7 +4,6 @@ import multiprocessing
import
os
import
pickle
import
signal
import
sys
import
threading
import
time
import
traceback
...
...
@@ -28,10 +27,11 @@ from vllm.distributed.device_communicators.shm_broadcast import (Handle,
MessageQueue
)
from
vllm.distributed.kv_transfer.kv_connector.utils
import
KVOutputAggregator
from
vllm.executor.multiproc_worker_utils
import
(
_add_prefix
,
set_multiprocessing_worker_envs
)
set_multiprocessing_worker_envs
)
from
vllm.logger
import
init_logger
from
vllm.utils
import
(
get_distributed_init_method
,
get_loopback_ip
,
get_mp_context
,
get_open_port
,
set_process_title
)
from
vllm.utils
import
(
decorate_logs
,
get_distributed_init_method
,
get_loopback_ip
,
get_mp_context
,
get_open_port
,
set_process_title
)
from
vllm.v1.executor.abstract
import
Executor
,
FailureCallback
from
vllm.v1.outputs
import
ModelRunnerOutput
from
vllm.worker.worker_base
import
WorkerWrapperBase
...
...
@@ -382,11 +382,11 @@ class WorkerProc:
pp_str
=
f
"PP
{
rank
//
tp_size
}
"
if
pp_size
>
1
else
""
tp_str
=
f
"TP
{
rank
%
tp_size
}
"
if
tp_size
>
1
else
""
suffix
=
f
"
{
pp_str
}{
'_'
if
pp_str
and
tp_str
else
''
}{
tp_str
}
"
process_name
=
"VllmWorker"
if
suffix
:
set_process_title
(
suffix
,
append
=
True
)
pid
=
os
.
getpid
()
_add_prefix
(
sys
.
stdout
,
f
"VllmWorker rank=
{
rank
}
"
,
pid
)
_add_prefix
(
sys
.
stderr
,
f
"VllmWorker rank=
{
rank
}
"
,
pid
)
process_name
=
f
"
{
process_name
}
{
suffix
}
"
decorate_logs
(
process_name
)
# Initialize MessageQueue for receiving SchedulerOutput
self
.
rpc_broadcast_mq
=
MessageQueue
.
create_from_handle
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment