Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
f6d723f6
Commit
f6d723f6
authored
Jan 24, 2025
by
Blazej
Committed by
GitHub
Jan 24, 2025
Browse files
feat: Add vLLM workers to LLM example (#41)
Add example of LLM disaggregated serving
parent
8980ec37
Changes
21
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
9 deletions
+18
-9
worker/src/python/triton_distributed/worker/worker.py
worker/src/python/triton_distributed/worker/worker.py
+18
-9
No files found.
worker/src/python/triton_distributed/worker/worker.py
View file @
f6d723f6
...
...
@@ -37,6 +37,7 @@ from triton_distributed.worker.remote_request import (
RemoteInferenceRequest
,
RemoteResponseSender
,
)
from
triton_distributed.worker.triton_core_operator
import
TritonCoreOperator
if
TYPE_CHECKING
:
import
uvicorn
...
...
@@ -90,6 +91,7 @@ class Worker:
self
.
_metrics_port
=
config
.
metrics_port
self
.
_metrics_server
:
Optional
[
uvicorn
.
Server
]
=
None
self
.
_component_id
=
self
.
_request_plane
.
component_id
self
.
_triton_core
:
Optional
[
tritonserver
.
Server
]
=
None
def
_import_operators
(
self
):
for
operator_config
in
self
.
_operator_configs
:
...
...
@@ -136,6 +138,20 @@ class Worker:
log_level
=
operator_config
.
log_level
,
logger_name
=
f
"OPERATOR
{
(
operator_config
.
name
,
operator_config
.
version
)
}
"
,
)
if
(
class_
==
TritonCoreOperator
or
issubclass
(
class_
,
TritonCoreOperator
)
)
and
not
self
.
_triton_core
:
self
.
_triton_core
=
tritonserver
.
Server
(
model_repository
=
"."
,
log_error
=
True
,
log_verbose
=
self
.
_log_level
,
strict_model_config
=
False
,
model_control_mode
=
tritonserver
.
ModelControlMode
.
EXPLICIT
,
log_file
=
self
.
_triton_log_path
,
).
start
(
wait_until_ready
=
True
)
operator
=
class_
(
operator_config
.
name
,
operator_config
.
version
,
...
...
@@ -229,14 +245,6 @@ class Worker:
async
def
serve
(
self
):
error
=
None
self
.
_triton_core
=
tritonserver
.
Server
(
model_repository
=
"."
,
log_error
=
True
,
log_verbose
=
self
.
_log_level
,
strict_model_config
=
False
,
model_control_mode
=
tritonserver
.
ModelControlMode
.
EXPLICIT
,
log_file
=
self
.
_triton_log_path
,
).
start
(
wait_until_ready
=
True
)
try
:
await
self
.
_request_plane
.
connect
()
except
Exception
as
e
:
...
...
@@ -297,7 +305,8 @@ class Worker:
tasks
=
[
t
for
t
in
asyncio
.
all_tasks
()
if
t
is
not
asyncio
.
current_task
()]
logger
.
info
(
"Cancelling %s outstanding tasks"
,
len
(
tasks
))
[
task
.
cancel
()
for
task
in
tasks
]
self
.
_triton_core
.
stop
()
if
self
.
_triton_core
:
self
.
_triton_core
.
stop
()
if
self
.
_metrics_server
:
self
.
_metrics_server
.
should_exit
=
True
await
self
.
_metrics_server
.
shutdown
()
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment