Commit 8588e33a authored by GuanLuo's avatar GuanLuo Committed by GitHub
Browse files

feat: Add KV publisher and receiver. Add KV aware routing example.


Signed-off-by: default avatarNeelay Shah <neelays@nvidia.com>
Co-authored-by: default avataraflowers <aflowers@nvidia.com>
Co-authored-by: default avatarRyan McCormick <rmccormick@nvidia.com>
Co-authored-by: default avatarhongkuanz <hongkuanz@nvidia.com>
Co-authored-by: default avatarNeelay Shah <neelays@nvidia.com>
parent d8aada0b
...@@ -20,6 +20,7 @@ from typing import Any, AsyncGenerator, Callable, Type ...@@ -20,6 +20,7 @@ from typing import Any, AsyncGenerator, Callable, Type
from pydantic import BaseModel, ValidationError from pydantic import BaseModel, ValidationError
from triton_distributed_rs._core import DistributedRuntime from triton_distributed_rs._core import DistributedRuntime
from triton_distributed_rs._core import KvRouter as KvRouter
def triton_worker(): def triton_worker():
......
from typing import AsyncGenerator, AsyncIterator, Callable # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import AsyncGenerator, AsyncIterator, Callable, List
class JsonLike: class JsonLike:
""" """
...@@ -54,6 +69,12 @@ class Component: ...@@ -54,6 +69,12 @@ class Component:
""" """
... ...
def event_subject(self, name: str) -> str:
"""
Create an event subject
"""
...
class Endpoint: class Endpoint:
""" """
An Endpoint is a single API endpoint An Endpoint is a single API endpoint
...@@ -74,6 +95,12 @@ class Endpoint: ...@@ -74,6 +95,12 @@ class Endpoint:
""" """
... ...
async def lease_id(self) -> int:
"""
Return primary lease id. Currently, cannot set a different lease id.
"""
...
class Client: class Client:
""" """
A client capable of calling served instances of an endpoint A client capable of calling served instances of an endpoint
...@@ -98,3 +125,22 @@ class Client: ...@@ -98,3 +125,22 @@ class Client:
Pick a specific instance of the endpoint Pick a specific instance of the endpoint
""" """
... ...
class KvRouter:
"""
The runtime object for a distributed NOVA applications
"""
...
def __init__(self, drt: DistributedRuntime, component: Component) -> KvRouter:
"""
Create a `KvRouter` object that is associated with the `component`
"""
def schedule(self, token_ids: List[int], lora_id: int) -> str:
"""
Return the worker id that should handle the given token ids,
exception will be raised if there is no worker available.
"""
...
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use super::*;
pub mod kv;
This diff is collapsed.
This diff is collapsed.
...@@ -74,3 +74,5 @@ rand = { version = "0.8"} ...@@ -74,3 +74,5 @@ rand = { version = "0.8"}
[dev-dependencies] [dev-dependencies]
assert_matches = "1.5.0" assert_matches = "1.5.0"
env_logger = "0.11"
rstest = "0.23.0"
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment