Commit 678cffb4 authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files
parent 6ba39b09
......@@ -21,7 +21,7 @@ Run this example using command below
cd /workspace/examples/python_rs/llm/vllm
dynemo-sdk serve sdk_kv_router.frontend:Frontend
dynamo-sdk serve sdk_kv_router.frontend:Frontend
```
......@@ -35,4 +35,4 @@ curl -X 'POST' \
-d '{
"msg": "In the heart of Eldoria, an ancient land of boundless magic and mysterious creatures, lies the long-forgotten city of Aeloria. Once a beacon of knowledge and power, Aeloria was buried beneath the shifting sands of time, lost to the world for centuries. You are an intrepid explorer, known for your unparalleled curiosity and courage, who has stumbled upon an ancient map hinting at ests that Aeloria holds a secret so profound that it has the potential to reshape the very fabric of reality. Your journey will take you through treacherous deserts, enchanted forests, and across perilous mountain ranges. Your Task: Character Background: Develop a detailed background for your character. Describe their motivations for seeking out Aeloria, their skills and weaknesses, and any personal connections to the ancient city or its legends. Are they driven by a quest for knowledge, a search for lost familt clue is hidden."
}'
````
\ No newline at end of file
```
......@@ -13,11 +13,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from dynemo.sdk import DYNEMO_IMAGE, api, depends, service
from sdk_kv_router.processor import Processor
from dynamo.sdk import DYNAMO_IMAGE, api, depends, service
@service(traffic={"timeout": 10000}, image=DYNEMO_IMAGE)
@service(traffic={"timeout": 10000}, image=DYNAMO_IMAGE)
class Frontend:
processor = depends(Processor)
......
......@@ -29,13 +29,13 @@ with bentoml.importing():
from common.chat_processor import ChatProcessor, ProcessMixIn
from common.protocol import MyRequestOutput, Tokens, vLLMGenerateRequest
from dynemo.sdk import depends, dynemo_context, dynemo_endpoint, service
from dynamo.sdk import depends, dynamo_context, dynamo_endpoint, service
@service(
dynemo={
dynamo={
"enabled": True,
"namespace": "dynemo",
"namespace": "dynamo",
},
resources={"cpu": "10", "memory": "20Gi"},
workers=1,
......@@ -92,7 +92,7 @@ class Processor(ProcessMixIn):
metrics=output.metrics,
)
@dynemo_endpoint()
@dynamo_endpoint()
async def generate(self, raw_request: ChatCompletionRequest):
request_id = str(uuid.uuid4())
(
......@@ -108,8 +108,8 @@ class Processor(ProcessMixIn):
):
worker_id = worker
break
runtime = dynemo_context["runtime"]
comp_ns, comp_name = VllmEngine.dynemo_address() # type: ignore
runtime = dynamo_context["runtime"]
comp_ns, comp_name = VllmEngine.dynamo_address() # type: ignore
worker_client = (
await runtime.namespace(comp_ns)
.component(comp_name)
......
......@@ -17,10 +17,11 @@ from enum import Enum
import bentoml
from common.protocol import Tokens
from dynemo.sdk import async_onstart, dynemo_context, dynemo_endpoint, service
from dynamo.sdk import async_onstart, dynamo_context, dynamo_endpoint, service
with bentoml.importing():
from dynemo.runtime import KvRouter
from dynamo.runtime import KvRouter
WorkerId = str
......@@ -33,9 +34,9 @@ class RoutingStrategy(Enum):
@service(
dynemo={
dynamo={
"enabled": True,
"namespace": "dynemo",
"namespace": "dynamo",
},
resources={"cpu": "10", "memory": "20Gi"},
workers=1,
......@@ -48,13 +49,13 @@ class Router:
def __init__(self):
self.model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
self.routing_strategy = RoutingStrategy.PREFIX
self.runtime = dynemo_context["runtime"]
self.runtime = dynamo_context["runtime"]
self.min_workers = 1
@async_onstart
async def init_engine(self):
workers_client = (
await self.runtime.namespace("dynemo")
await self.runtime.namespace("dynamo")
.component("VllmEngine")
.endpoint("generate")
.client()
......@@ -74,11 +75,11 @@ class Router:
)
await asyncio.sleep(5)
kv_listener = self.runtime.namespace("dynemo").component(self.model_name)
kv_listener = self.runtime.namespace("dynamo").component(self.model_name)
await kv_listener.create_service()
self.router = KvRouter(self.runtime, kv_listener)
@dynemo_endpoint()
@dynamo_endpoint()
async def generate(self, request: Tokens):
lora_id = 0
worker_id = ""
......
......@@ -27,11 +27,11 @@ with bentoml.importing():
from common.protocol import MyRequestOutput, vLLMGenerateRequest
from vllm.engine.multiprocessing.client import MQLLMEngineClient
from dynemo.llm import KvMetricsPublisher
from dynemo.sdk import (
from dynamo.llm import KvMetricsPublisher
from dynamo.sdk import (
async_onstart,
dynemo_context,
dynemo_endpoint,
dynamo_context,
dynamo_endpoint,
server_context,
service,
)
......@@ -42,9 +42,9 @@ lease_id = None
@service(
dynemo={
dynamo={
"enabled": True,
"namespace": "dynemo",
"namespace": "dynamo",
},
resources={"gpu": 1, "cpu": "10", "memory": "20Gi"},
workers=1,
......@@ -63,9 +63,9 @@ class VllmEngine(BaseVllmEngine):
block_size=64,
max_model_len=16384,
)
VLLM_WORKER_ID = dynemo_context["endpoints"][0].lease_id()
VLLM_WORKER_ID = dynamo_context["endpoints"][0].lease_id()
os.environ["VLLM_WORKER_ID"] = str(VLLM_WORKER_ID)
os.environ["VLLM_KV_NAMESPACE"] = "dynemo"
os.environ["VLLM_KV_NAMESPACE"] = "dynamo"
os.environ["VLLM_KV_COMPONENT"] = "vllm"
vllm_logger.info(f"Generate endpoint ID: {VLLM_WORKER_ID}")
os.environ["CUDA_VISIBLE_DEVICES"] = f"{server_context.worker_index - 1}"
......@@ -74,7 +74,7 @@ class VllmEngine(BaseVllmEngine):
super().__init__(self.engine_args)
async def create_metrics_publisher_endpoint(self):
component = dynemo_context["component"]
component = dynamo_context["component"]
await self.metrics_publisher.create_endpoint(component)
@async_onstart
......@@ -88,7 +88,7 @@ class VllmEngine(BaseVllmEngine):
task = asyncio.create_task(self.create_metrics_publisher_endpoint())
task.add_done_callback(lambda _: print("metrics publisher endpoint created"))
@dynemo_endpoint()
@dynamo_endpoint()
async def generate(self, request: vLLMGenerateRequest):
sampling_params = request.sampling_params
# rust HTTP requires Delta streaming
......
......@@ -27,8 +27,8 @@ version = "0.2.0"
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
homepage = "https://github.com/dynemo-ai/dynemo"
repository = "https://github.com/dynemo-ai/dynemo.git"
homepage = "https://github.com/ai-dynamo/dynamo"
repository = "https://github.com/ai-dynamo/dynamo.git"
[workspace.dependencies]
......
......@@ -27,7 +27,7 @@ Annotated { data: Some("o"), id: None, event: None, comment: None }
Annotated { data: Some("r"), id: None, event: None, comment: None }
Annotated { data: Some("l"), id: None, event: None, comment: None }
Annotated { data: Some("d"), id: None, event: None, comment: None }
ServiceSet { services: [ServiceInfo { name: "dynemo_init_backend_720278f8", id: "eOHMc4ndRw8s5flv4WOZx7", version: "0.0.1", started: "2025-02-26T18:54:04.917294605Z", endpoints: [EndpointInfo { name: "dynemo_init_backend_720278f8-generate-694d951a80e06abf", subject: "dynemo_init_backend_720278f8.generate-694d951a80e06abf", data: Some(Metrics(Object {"average_processing_time": Number(53662), "data": Object {"val": Number(10)}, "last_error": String(""), "num_errors": Number(0), "num_requests": Number(2), "processing_time": Number(107325), "queue_group": String("q")})) }] }] }
ServiceSet { services: [ServiceInfo { name: "dynamo_init_backend_720278f8", id: "eOHMc4ndRw8s5flv4WOZx7", version: "0.0.1", started: "2025-02-26T18:54:04.917294605Z", endpoints: [EndpointInfo { name: "dynamo_init_backend_720278f8-generate-694d951a80e06abf", subject: "dynamo_init_backend_720278f8.generate-694d951a80e06abf", data: Some(Metrics(Object {"average_processing_time": Number(53662), "data": Object {"val": Number(10)}, "last_error": String(""), "num_errors": Number(0), "num_requests": Number(2), "processing_time": Number(107325), "queue_group": String("q")})) }] }] }
```
Note the following stats in the output demonstrate the custom
......
......@@ -18,7 +18,7 @@ name = "dynamo-run"
version = "0.1.0"
edition = "2021"
authors = ["NVIDIA"]
homepage = "https://github.com/dynemo-ai/dynemo"
homepage = "https://github.com/ai-dynamo/dynamo"
license = "Apache-2.0"
[features]
......
......@@ -19,8 +19,8 @@ version = "0.2.1"
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
homepage = "https://github.com/dynemo-ai/dynemo"
repository = "https://github.com/dynemo-ai/dynemo.git"
homepage = "https://github.com/ai-dynamo/dynamo"
repository = "https://github.com/ai-dynamo/dynamo.git"
[lib]
name = "dynamo_llm_capi"
......
......@@ -15,7 +15,7 @@
language = "C++"
cpp_compat = true
include_guard = "__NVIDIA_DYNEMO_LLM_API__"
include_guard = "__NVIDIA_DYNAMO_LLM_API__"
[enum]
......
......@@ -19,8 +19,8 @@ version = "0.2.1"
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
homepage = "https://github.com/dynemo-ai/dynemo"
repository = "https://github.com/dynemo-ai/dynemo.git"
homepage = "https://github.com/ai-dynamo/dynamo"
repository = "https://github.com/ai-dynamo/dynamo.git"
[lib]
path = "rust/lib.rs"
......
......@@ -18,8 +18,8 @@ version = "0.2.1"
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
homepage = "https://github.com/dynemo-ai/dynemo"
repository = "https://github.com/dynemo-ai/dynemo.git"
homepage = "https://github.com/ai-dynamo/dynamo"
repository = "https://github.com/ai-dynamo/dynamo.git"
[package]
name = "dynamo-llm"
......
......@@ -21,8 +21,8 @@ version = "0.2.1" # TODO: Centralize Version Automation
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
homepage = "https://github.com/dynemo-ai/dynemo"
repository = "https://github.com/dynemo-ai/dynemo.git"
homepage = "https://github.com/ai-dynamo/dynamo"
repository = "https://github.com/ai-dynamo/dynamo.git"
keywords = ["llm", "genai", "inference", "nvidia", "distributed", "dynamo"]
[features]
......
......@@ -19,7 +19,7 @@ version = "0.2.1"
description = "Distributed Inference Framework"
readme = "README.md"
authors = [
{ name = "NVIDIA Inc.", email = "sw-dl-dynemo@nvidia.com" },
{ name = "NVIDIA Inc.", email = "sw-dl-dynamo@nvidia.com" },
]
license = { file = "LICENSE" }
requires-python = ">=3.10"
......@@ -146,4 +146,4 @@ ignore_missing_imports = true
# declare namespace packages
[tool.setuptools]
namespace-packages = ["dynemo", "dynemo.sdk"]
namespace-packages = ["dynamo", "dynamo.sdk"]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment