Commit 8588e33a authored by GuanLuo's avatar GuanLuo Committed by GitHub
Browse files

feat: Add KV publisher and receiver. Add KV aware routing example.


Signed-off-by: default avatarNeelay Shah <neelays@nvidia.com>
Co-authored-by: default avataraflowers <aflowers@nvidia.com>
Co-authored-by: default avatarRyan McCormick <rmccormick@nvidia.com>
Co-authored-by: default avatarhongkuanz <hongkuanz@nvidia.com>
Co-authored-by: default avatarNeelay Shah <neelays@nvidia.com>
parent d8aada0b
......@@ -20,6 +20,7 @@ from typing import Any, AsyncGenerator, Callable, Type
from pydantic import BaseModel, ValidationError
from triton_distributed_rs._core import DistributedRuntime
from triton_distributed_rs._core import KvRouter as KvRouter
def triton_worker():
......
from typing import AsyncGenerator, AsyncIterator, Callable
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import AsyncGenerator, AsyncIterator, Callable, List
class JsonLike:
"""
......@@ -54,6 +69,12 @@ class Component:
"""
...
def event_subject(self, name: str) -> str:
"""
Create an event subject
"""
...
class Endpoint:
"""
An Endpoint is a single API endpoint
......@@ -74,6 +95,12 @@ class Endpoint:
"""
...
async def lease_id(self) -> int:
"""
Return primary lease id. Currently, cannot set a different lease id.
"""
...
class Client:
"""
A client capable of calling served instances of an endpoint
......@@ -98,3 +125,22 @@ class Client:
Pick a specific instance of the endpoint
"""
...
class KvRouter:
"""
The runtime object for a distributed NOVA applications
"""
...
def __init__(self, drt: DistributedRuntime, component: Component) -> KvRouter:
"""
Create a `KvRouter` object that is associated with the `component`
"""
def schedule(self, token_ids: List[int], lora_id: int) -> str:
"""
Return the worker id that should handle the given token ids,
exception will be raised if there is no worker available.
"""
...
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use super::*;
pub mod kv;
This diff is collapsed.
This diff is collapsed.
......@@ -74,3 +74,5 @@ rand = { version = "0.8"}
[dev-dependencies]
assert_matches = "1.5.0"
env_logger = "0.11"
rstest = "0.23.0"
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment