"lib/bindings/python/examples/error_handling/client.py" did not exist on "8588e33a464d9f82d6ad93a433590a3bc3ff92de"
Commit f6d723f6 authored by Blazej's avatar Blazej Committed by GitHub
Browse files

feat: Add vLLM workers to LLM example (#41)

Add example of LLM disaggregated serving
parent 8980ec37
...@@ -37,6 +37,7 @@ from triton_distributed.worker.remote_request import ( ...@@ -37,6 +37,7 @@ from triton_distributed.worker.remote_request import (
RemoteInferenceRequest, RemoteInferenceRequest,
RemoteResponseSender, RemoteResponseSender,
) )
from triton_distributed.worker.triton_core_operator import TritonCoreOperator
if TYPE_CHECKING: if TYPE_CHECKING:
import uvicorn import uvicorn
...@@ -90,6 +91,7 @@ class Worker: ...@@ -90,6 +91,7 @@ class Worker:
self._metrics_port = config.metrics_port self._metrics_port = config.metrics_port
self._metrics_server: Optional[uvicorn.Server] = None self._metrics_server: Optional[uvicorn.Server] = None
self._component_id = self._request_plane.component_id self._component_id = self._request_plane.component_id
self._triton_core: Optional[tritonserver.Server] = None
def _import_operators(self): def _import_operators(self):
for operator_config in self._operator_configs: for operator_config in self._operator_configs:
...@@ -136,6 +138,20 @@ class Worker: ...@@ -136,6 +138,20 @@ class Worker:
log_level=operator_config.log_level, log_level=operator_config.log_level,
logger_name=f"OPERATOR{(operator_config.name,operator_config.version)}", logger_name=f"OPERATOR{(operator_config.name,operator_config.version)}",
) )
if (
class_ == TritonCoreOperator
or issubclass(class_, TritonCoreOperator)
) and not self._triton_core:
self._triton_core = tritonserver.Server(
model_repository=".",
log_error=True,
log_verbose=self._log_level,
strict_model_config=False,
model_control_mode=tritonserver.ModelControlMode.EXPLICIT,
log_file=self._triton_log_path,
).start(wait_until_ready=True)
operator = class_( operator = class_(
operator_config.name, operator_config.name,
operator_config.version, operator_config.version,
...@@ -229,14 +245,6 @@ class Worker: ...@@ -229,14 +245,6 @@ class Worker:
async def serve(self): async def serve(self):
error = None error = None
self._triton_core = tritonserver.Server(
model_repository=".",
log_error=True,
log_verbose=self._log_level,
strict_model_config=False,
model_control_mode=tritonserver.ModelControlMode.EXPLICIT,
log_file=self._triton_log_path,
).start(wait_until_ready=True)
try: try:
await self._request_plane.connect() await self._request_plane.connect()
except Exception as e: except Exception as e:
...@@ -297,7 +305,8 @@ class Worker: ...@@ -297,7 +305,8 @@ class Worker:
tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
logger.info("Cancelling %s outstanding tasks", len(tasks)) logger.info("Cancelling %s outstanding tasks", len(tasks))
[task.cancel() for task in tasks] [task.cancel() for task in tasks]
self._triton_core.stop() if self._triton_core:
self._triton_core.stop()
if self._metrics_server: if self._metrics_server:
self._metrics_server.should_exit = True self._metrics_server.should_exit = True
await self._metrics_server.shutdown() await self._metrics_server.shutdown()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment