Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
a092bcf4
Commit
a092bcf4
authored
Mar 12, 2025
by
ptarasiewiczNV
Committed by
GitHub
Mar 12, 2025
Browse files
chore: Reduce conditional prefill logs (#121)
Co-authored-by: ptarasiewicz@nvidia.com <Piotr Tarasiewicz>
parent
1725c02d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
8 deletions
+16
-8
examples/python_rs/llm/vllm/prefill_worker.py
examples/python_rs/llm/vllm/prefill_worker.py
+4
-2
examples/python_rs/llm/vllm/worker.py
examples/python_rs/llm/vllm/worker.py
+12
-6
No files found.
examples/python_rs/llm/vllm/prefill_worker.py
View file @
a092bcf4
...
@@ -89,7 +89,7 @@ async def worker(runtime: DistributedRuntime, engine_args: AsyncEngineArgs):
...
@@ -89,7 +89,7 @@ async def worker(runtime: DistributedRuntime, engine_args: AsyncEngineArgs):
else
"vllm"
else
"vllm"
)
)
vllm_logger
.
info
(
vllm_logger
.
info
(
f
"Prefill queue:
{
prefill_queue_nats_server
}
:
{
prefill_queue_stream_name
}
"
"Prefill queue:
%s:%s"
,
prefill_queue_nats_server
,
prefill_queue_stream_name
)
)
request_handler
=
RequestHandler
(
engine_client
,
metadata_store
)
request_handler
=
RequestHandler
(
engine_client
,
metadata_store
)
...
@@ -104,7 +104,9 @@ async def worker(runtime: DistributedRuntime, engine_args: AsyncEngineArgs):
...
@@ -104,7 +104,9 @@ async def worker(runtime: DistributedRuntime, engine_args: AsyncEngineArgs):
# need to test and check how much overhead it is
# need to test and check how much overhead it is
prefill_request
=
await
prefill_queue
.
dequeue_prefill_request
()
prefill_request
=
await
prefill_queue
.
dequeue_prefill_request
()
if
prefill_request
is
not
None
:
if
prefill_request
is
not
None
:
vllm_logger
.
info
(
f
"Dequeued prefill request:
{
prefill_request
}
"
)
vllm_logger
.
debug
(
"Dequeued prefill request: %s"
,
prefill_request
.
request_id
)
async
for
_
in
request_handler
.
generate
(
prefill_request
):
async
for
_
in
request_handler
.
generate
(
prefill_request
):
pass
pass
...
...
examples/python_rs/llm/vllm/worker.py
View file @
a092bcf4
...
@@ -60,7 +60,9 @@ class RequestHandler:
...
@@ -60,7 +60,9 @@ class RequestHandler:
)
)
self
.
_prefill_queue_stream_name
=
model_name
self
.
_prefill_queue_stream_name
=
model_name
vllm_logger
.
info
(
vllm_logger
.
info
(
f
"Prefill queue:
{
self
.
_prefill_queue_nats_server
}
:
{
self
.
_prefill_queue_stream_name
}
"
"Prefill queue: %s:%s"
,
self
.
_prefill_queue_nats_server
,
self
.
_prefill_queue_stream_name
,
)
)
print
(
"RequestHandler initialized"
)
print
(
"RequestHandler initialized"
)
...
@@ -92,13 +94,17 @@ class RequestHandler:
...
@@ -92,13 +94,17 @@ class RequestHandler:
is_remote_prefill
=
True
,
is_remote_prefill
=
True
,
remote_prefill_request_callback
=
self
.
get_remote_prefill_request_callback
(),
remote_prefill_request_callback
=
self
.
get_remote_prefill_request_callback
(),
)
)
vllm_logger
.
info
(
vllm_logger
.
debug
(
f
"Prefilling remotely for request
{
request
.
request_id
}
with length
{
len
(
request
.
engine_prompt
[
'prompt_token_ids'
])
}
"
"Prefilling remotely for request %s with length %s"
,
request
.
request_id
,
len
(
request
.
engine_prompt
[
"prompt_token_ids"
]),
)
)
else
:
else
:
remote_prefill_params
=
None
remote_prefill_params
=
None
vllm_logger
.
info
(
vllm_logger
.
debug
(
f
"Prefilling locally for request
{
request
.
request_id
}
with length
{
len
(
request
.
engine_prompt
[
'prompt_token_ids'
])
}
"
"Prefilling locally for request %s with length %s"
,
request
.
request_id
,
len
(
request
.
engine_prompt
[
"prompt_token_ids"
]),
)
)
# rust HTTP requires Delta streaming
# rust HTTP requires Delta streaming
...
@@ -141,7 +147,7 @@ async def worker(runtime: DistributedRuntime, engine_args: AsyncEngineArgs):
...
@@ -141,7 +147,7 @@ async def worker(runtime: DistributedRuntime, engine_args: AsyncEngineArgs):
# TODO: do we need these env vars?
# TODO: do we need these env vars?
VLLM_WORKER_ID
=
endpoint
.
lease_id
()
VLLM_WORKER_ID
=
endpoint
.
lease_id
()
os
.
environ
[
"VLLM_WORKER_ID"
]
=
str
(
VLLM_WORKER_ID
)
os
.
environ
[
"VLLM_WORKER_ID"
]
=
str
(
VLLM_WORKER_ID
)
vllm_logger
.
info
(
f
"Generate endpoint ID:
{
VLLM_WORKER_ID
}
"
)
vllm_logger
.
info
(
"Generate endpoint ID:
%s"
,
VLLM_WORKER_ID
)
VLLM_KV_NAMESPACE
=
"dynamo-init"
VLLM_KV_NAMESPACE
=
"dynamo-init"
os
.
environ
[
"VLLM_KV_NAMESPACE"
]
=
str
(
VLLM_KV_NAMESPACE
)
os
.
environ
[
"VLLM_KV_NAMESPACE"
]
=
str
(
VLLM_KV_NAMESPACE
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment