Commit 602352ce authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

chore: rename dynamo (#44)


Co-authored-by: default avatarBiswa Panda <biswa.panda@gmail.com>
parent ecf53ce2
...@@ -14,31 +14,31 @@ ...@@ -14,31 +14,31 @@
# limitations under the License. # limitations under the License.
[project] [project]
name = "dynemo-sdk" name = "dynamo-sdk"
version = "0.1.0" version = "0.1.0"
description = "Distributed Inference Framework - SDK" description = "Distributed Inference Framework - SDK"
readme = "README.md" readme = "README.md"
authors = [ authors = [
{ name = "NVIDIA Inc.", email = "sw-dl-dynemo@nvidia.com" }, { name = "NVIDIA Inc.", email = "sw-dl-dynamo@nvidia.com" },
] ]
dependencies = [ dependencies = [
# TODO: currently compatible with bentoml==1.4.1, # TODO: currently compatible with bentoml==1.4.1,
# v1.4.2 has removed normalize_identifier which is used in compoundai # v1.4.2 has removed normalize_identifier which is used in dynamo
# "dynemo=={dynemo_version}", # "dynamo=={dynamo_version}",
"bentoml==1.4.1", "bentoml==1.4.1",
"types-psutil==7.0.0.20250218", "types-psutil==7.0.0.20250218",
] ]
[project.scripts] [project.scripts]
dynemo-sdk = "dynemo.sdk.cli.cli:cli" dynamo-sdk = "dynamo.sdk.cli.cli:cli"
[tool.setuptools] [tool.setuptools]
namespace-packages = ["dynemo"] namespace-packages = ["dynamo"]
package-dir = {"dynemo.sdk" = "src/dynemo/sdk"} package-dir = {"dynamo.sdk" = "src/dynamo/sdk"}
[build-system] [build-system]
requires = ["hatchling"] requires = ["hatchling"]
build-backend = "hatchling.build" build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel] [tool.hatch.build.targets.wheel]
packages = ["src/dynemo"] packages = ["src/dynamo"]
\ No newline at end of file \ No newline at end of file
...@@ -18,21 +18,21 @@ from typing import Any ...@@ -18,21 +18,21 @@ from typing import Any
from bentoml import api # type: ignore from bentoml import api # type: ignore
from bentoml._internal.context import server_context # type: ignore from bentoml._internal.context import server_context # type: ignore
from dynemo.sdk.lib.decorators import async_onstart, dynemo_api, dynemo_endpoint from dynamo.sdk.lib.decorators import async_onstart, dynamo_api, dynamo_endpoint
from dynemo.sdk.lib.dependency import depends from dynamo.sdk.lib.dependency import depends
from dynemo.sdk.lib.image import DYNEMO_IMAGE from dynamo.sdk.lib.image import DYNEMO_IMAGE
from dynemo.sdk.lib.service import service from dynamo.sdk.lib.service import service
dynemo_context: dict[str, Any] = {} dynamo_context: dict[str, Any] = {}
__all__ = [ __all__ = [
"api", "api",
"server_context", "server_context",
"async_onstart", "async_onstart",
"dynemo_api", "dynamo_api",
"dynemo_endpoint", "dynamo_endpoint",
"depends", "depends",
"DYNEMO_IMAGE", "DYNEMO_IMAGE",
"service", "service",
"dynemo_context", "dynamo_context",
] ]
...@@ -35,8 +35,8 @@ def create_bentoml_cli() -> click.Command: ...@@ -35,8 +35,8 @@ def create_bentoml_cli() -> click.Command:
from bentoml_cli.secret import secret_command from bentoml_cli.secret import secret_command
from bentoml_cli.utils import BentoMLCommandGroup, get_entry_points from bentoml_cli.utils import BentoMLCommandGroup, get_entry_points
from dynemo.sdk.cli.serve import serve_command from dynamo.sdk.cli.serve import serve_command
from dynemo.sdk.cli.start import start_command from dynamo.sdk.cli.start import start_command
server_context.service_type = "cli" server_context.service_type = "cli"
......
...@@ -318,7 +318,7 @@ def build_serve_command() -> click.Group: ...@@ -318,7 +318,7 @@ def build_serve_command() -> click.Group:
else: else:
# bentoml>=1.2 # bentoml>=1.2
# from _bentoml_impl.server import serve_http # from _bentoml_impl.server import serve_http
from dynemo.sdk.cli.serving import serve_http # type: ignore from dynamo.sdk.cli.serving import serve_http # type: ignore
svc.inject_config() svc.inject_config()
serve_http( serve_http(
......
...@@ -27,10 +27,10 @@ from typing import Any ...@@ -27,10 +27,10 @@ from typing import Any
import click import click
from dynemo.runtime import DistributedRuntime, dynemo_endpoint, dynemo_worker from dynamo.runtime import DistributedRuntime, dynamo_endpoint, dynamo_worker
from dynemo.sdk import dynemo_context from dynamo.sdk import dynamo_context
logger = logging.getLogger("dynemo.sdk.serve.dynemo") logger = logging.getLogger("dynamo.sdk.serve.dynamo")
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
...@@ -65,16 +65,16 @@ def main( ...@@ -65,16 +65,16 @@ def main(
worker_env: str | None, worker_env: str | None,
worker_id: int | None, worker_id: int | None,
) -> None: ) -> None:
"""Start a worker for the given service - either Dynemo or regular service""" """Start a worker for the given service - either Dynamo or regular service"""
from _bentoml_impl.loader import import_service from _bentoml_impl.loader import import_service
from bentoml._internal.container import BentoMLContainer from bentoml._internal.container import BentoMLContainer
from bentoml._internal.context import server_context from bentoml._internal.context import server_context
from bentoml._internal.log import configure_server_logging from bentoml._internal.log import configure_server_logging
run_id = generate_run_id() run_id = generate_run_id()
dynemo_context["service_name"] = service_name dynamo_context["service_name"] = service_name
dynemo_context["runner_map"] = runner_map dynamo_context["runner_map"] = runner_map
dynemo_context["worker_id"] = worker_id dynamo_context["worker_id"] = worker_id
# Import service first to check configuration # Import service first to check configuration
service = import_service(bento_identifier) service = import_service(bento_identifier)
...@@ -99,15 +99,15 @@ def main( ...@@ -99,15 +99,15 @@ def main(
t.cast(t.Dict[str, str], json.loads(runner_map)) t.cast(t.Dict[str, str], json.loads(runner_map))
) )
# Check if Dynemo is enabled for this service # Check if Dynamo is enabled for this service
if service.is_dynemo_component(): if service.is_dynamo_component():
if worker_id is not None: if worker_id is not None:
server_context.worker_index = worker_id server_context.worker_index = worker_id
@dynemo_worker() @dynamo_worker()
async def worker(runtime: DistributedRuntime): async def worker(runtime: DistributedRuntime):
global dynemo_context global dynamo_context
dynemo_context["runtime"] = runtime dynamo_context["runtime"] = runtime
if service_name and service_name != service.name: if service_name and service_name != service.name:
server_context.service_type = "service" server_context.service_type = "service"
else: else:
...@@ -115,8 +115,8 @@ def main( ...@@ -115,8 +115,8 @@ def main(
server_context.service_name = service.name server_context.service_name = service.name
# Get Dynemo configuration and create component # Get Dynamo configuration and create component
namespace, component_name = service.dynemo_address() namespace, component_name = service.dynamo_address()
logger.info( logger.info(
f"[{run_id}] Registering component {namespace}/{component_name}" f"[{run_id}] Registering component {namespace}/{component_name}"
) )
...@@ -132,32 +132,32 @@ def main( ...@@ -132,32 +132,32 @@ def main(
dep.set_runtime(runtime) dep.set_runtime(runtime)
logger.info(f"[{run_id}] Set runtime for dependency: {dep}") logger.info(f"[{run_id}] Set runtime for dependency: {dep}")
# Then register all Dynemo endpoints # Then register all Dynamo endpoints
dynemo_endpoints = service.get_dynemo_endpoints() dynamo_endpoints = service.get_dynamo_endpoints()
if not dynemo_endpoints: if not dynamo_endpoints:
error_msg = f"[{run_id}] FATAL ERROR: No Dynemo endpoints found in service {service.name}!" error_msg = f"[{run_id}] FATAL ERROR: No Dynamo endpoints found in service {service.name}!"
logger.error(error_msg) logger.error(error_msg)
raise ValueError(error_msg) raise ValueError(error_msg)
endpoints = [] endpoints = []
for name, endpoint in dynemo_endpoints.items(): for name, endpoint in dynamo_endpoints.items():
td_endpoint = component.endpoint(name) td_endpoint = component.endpoint(name)
logger.info(f"[{run_id}] Registering endpoint '{name}'") logger.info(f"[{run_id}] Registering endpoint '{name}'")
endpoints.append(td_endpoint) endpoints.append(td_endpoint)
# Bind an instance of inner to the endpoint # Bind an instance of inner to the endpoint
dynemo_context["component"] = component dynamo_context["component"] = component
dynemo_context["endpoints"] = endpoints dynamo_context["endpoints"] = endpoints
class_instance = service.inner() class_instance = service.inner()
twm = [] twm = []
for name, endpoint in dynemo_endpoints.items(): for name, endpoint in dynamo_endpoints.items():
bound_method = endpoint.func.__get__(class_instance) bound_method = endpoint.func.__get__(class_instance)
# Only pass request type for now, use Any for response # Only pass request type for now, use Any for response
# TODO: Handle a dynemo_endpoint not having types # TODO: Handle a dynamo_endpoint not having types
# TODO: Handle multiple endpoints in a single component # TODO: Handle multiple endpoints in a single component
dynemo_wrapped_method = dynemo_endpoint(endpoint.request_type, Any)( dynamo_wrapped_method = dynamo_endpoint(endpoint.request_type, Any)(
bound_method bound_method
) )
twm.append(dynemo_wrapped_method) twm.append(dynamo_wrapped_method)
# Run startup hooks before setting up endpoints # Run startup hooks before setting up endpoints
for name, member in vars(class_instance.__class__).items(): for name, member in vars(class_instance.__class__).items():
if callable(member) and getattr( if callable(member) and getattr(
...@@ -180,7 +180,7 @@ def main( ...@@ -180,7 +180,7 @@ def main(
result = await endpoints[0].serve_endpoint(twm[0]) result = await endpoints[0].serve_endpoint(twm[0])
except Exception as e: except Exception as e:
logger.error(f"[{run_id}] Error in Dynemo component setup: {str(e)}") logger.error(f"[{run_id}] Error in Dynamo component setup: {str(e)}")
raise raise
asyncio.run(worker()) asyncio.run(worker())
......
...@@ -47,7 +47,7 @@ class ServiceProtocol(Protocol): ...@@ -47,7 +47,7 @@ class ServiceProtocol(Protocol):
models: list[Any] models: list[Any]
bento: Any bento: Any
def is_dynemo_component(self) -> bool: def is_dynamo_component(self) -> bool:
... ...
...@@ -155,7 +155,7 @@ def create_dependency_watcher( ...@@ -155,7 +155,7 @@ def create_dependency_watcher(
return watcher, socket, uri return watcher, socket, uri
def create_dynemo_watcher( def create_dynamo_watcher(
bento_identifier: str, bento_identifier: str,
svc: ServiceProtocol, svc: ServiceProtocol,
uds_path: str, uds_path: str,
...@@ -165,7 +165,7 @@ def create_dynemo_watcher( ...@@ -165,7 +165,7 @@ def create_dynemo_watcher(
working_dir: Optional[str] = None, working_dir: Optional[str] = None,
env: Optional[Dict[str, str]] = None, env: Optional[Dict[str, str]] = None,
) -> tuple[Watcher, CircusSocket, str]: ) -> tuple[Watcher, CircusSocket, str]:
"""Create a watcher for a Dynemo service in the dependency graph""" """Create a watcher for a Dynamo service in the dependency graph"""
from bentoml.serving import create_watcher from bentoml.serving import create_watcher
# Get socket for this service # Get socket for this service
...@@ -174,10 +174,10 @@ def create_dynemo_watcher( ...@@ -174,10 +174,10 @@ def create_dynemo_watcher(
# Get worker configuration # Get worker configuration
num_workers, worker_envs = scheduler.get_worker_env(svc) num_workers, worker_envs = scheduler.get_worker_env(svc)
# Create Dynemo-specific worker args # Create Dynamo-specific worker args
args = [ args = [
"-m", "-m",
"dynemo.sdk.cli.serve_dynemo", # Use our Dynemo worker module "dynamo.sdk.cli.serve_dynamo", # Use our Dynamo worker module
bento_identifier, bento_identifier,
"--service-name", "--service-name",
svc.name, svc.name,
...@@ -190,7 +190,7 @@ def create_dynemo_watcher( ...@@ -190,7 +190,7 @@ def create_dynemo_watcher(
# Create the watcher with dependency map in environment # Create the watcher with dependency map in environment
watcher = create_watcher( watcher = create_watcher(
name=f"dynemo_service_{svc.name}", name=f"dynamo_service_{svc.name}",
args=args, args=args,
numprocesses=num_workers, numprocesses=num_workers,
working_dir=working_dir, working_dir=working_dir,
...@@ -306,12 +306,12 @@ def serve_http( ...@@ -306,12 +306,12 @@ def serve_http(
if name in dependency_map: if name in dependency_map:
continue continue
# Check if this is a Dynemo service # Check if this is a Dynamo service
if ( if (
hasattr(dep_svc, "is_dynemo_component") hasattr(dep_svc, "is_dynamo_component")
and dep_svc.is_dynemo_component() and dep_svc.is_dynamo_component()
): ):
new_watcher, new_socket, uri = create_dynemo_watcher( new_watcher, new_socket, uri = create_dynamo_watcher(
bento_id, bento_id,
dep_svc, dep_svc,
uds_path, uds_path,
...@@ -354,7 +354,7 @@ def serve_http( ...@@ -354,7 +354,7 @@ def serve_http(
except ValueError as e: except ValueError as e:
raise BentoMLConfigException(f"Invalid host IP address: {host}") from e raise BentoMLConfigException(f"Invalid host IP address: {host}") from e
if not svc.is_dynemo_component(): if not svc.is_dynamo_component():
sockets.append( sockets.append(
CircusSocket( CircusSocket(
name=API_SERVER_NAME, name=API_SERVER_NAME,
...@@ -405,12 +405,12 @@ def serve_http( ...@@ -405,12 +405,12 @@ def serve_http(
scheme = "https" if BentoMLContainer.ssl.enabled.get() else "http" scheme = "https" if BentoMLContainer.ssl.enabled.get() else "http"
# Check if this is a Dynemo service # Check if this is a Dynamo service
if hasattr(svc, "is_dynemo_component") and svc.is_dynemo_component(): if hasattr(svc, "is_dynamo_component") and svc.is_dynamo_component():
# Create Dynemo-specific watcher using existing socket # Create Dynamo-specific watcher using existing socket
args = [ args = [
"-m", "-m",
"dynemo.sdk.cli.serve_dynemo", # Use our Dynemo worker module "dynamo.sdk.cli.serve_dynamo", # Use our Dynamo worker module
bento_identifier, bento_identifier,
"--service-name", "--service-name",
svc.name, svc.name,
...@@ -418,7 +418,7 @@ def serve_http( ...@@ -418,7 +418,7 @@ def serve_http(
"$(CIRCUS.WID)", "$(CIRCUS.WID)",
] ]
watcher = create_watcher( watcher = create_watcher(
name=f"dynemo_service_{svc.name}", name=f"dynamo_service_{svc.name}",
args=args, args=args,
numprocesses=num_workers, numprocesses=num_workers,
working_dir=str(bento_path.absolute()), working_dir=str(bento_path.absolute()),
...@@ -426,7 +426,7 @@ def serve_http( ...@@ -426,7 +426,7 @@ def serve_http(
env=env, # Dependency map will be injected by serve_http env=env, # Dependency map will be injected by serve_http
) )
watchers.append(watcher) watchers.append(watcher)
print(f"dynemo_service_{svc.name} entrypoint created") print(f"dynamo_service_{svc.name} entrypoint created")
else: else:
# Create regular BentoML service watcher # Create regular BentoML service watcher
watchers.append( watchers.append(
...@@ -473,14 +473,14 @@ def serve_http( ...@@ -473,14 +473,14 @@ def serve_http(
arbiter.exit_stack.callback(shutil.rmtree, uds_path, ignore_errors=True) arbiter.exit_stack.callback(shutil.rmtree, uds_path, ignore_errors=True)
arbiter.start( arbiter.start(
cb=lambda _: logger.info( # type: ignore cb=lambda _: logger.info( # type: ignore
"Starting Dynemo Service %s (%s/%s) listening on %s://%s:%d (Press CTRL+C to quit)" "Starting Dynamo Service %s (%s/%s) listening on %s://%s:%d (Press CTRL+C to quit)"
if (hasattr(svc, "is_dynemo_component") and svc.is_dynemo_component()) if (hasattr(svc, "is_dynamo_component") and svc.is_dynamo_component())
else 'Starting production %s BentoServer from "%s" (Press CTRL+C to quit)', else 'Starting production %s BentoServer from "%s" (Press CTRL+C to quit)',
*( *(
(svc.name, *svc.dynemo_address(), scheme, log_host, port) (svc.name, *svc.dynamo_address(), scheme, log_host, port)
if ( if (
hasattr(svc, "is_dynemo_component") hasattr(svc, "is_dynamo_component")
and svc.is_dynemo_component() and svc.is_dynamo_component()
) )
else (scheme.upper(), bento_identifier) else (scheme.upper(), bento_identifier)
), ),
......
...@@ -238,7 +238,7 @@ def build_start_command() -> click.Group: ...@@ -238,7 +238,7 @@ def build_start_command() -> click.Group:
) )
else: else:
# for >=1.2 bentos # for >=1.2 bentos
from dynemo.sdk.cli.serving import serve_http from dynamo.sdk.cli.serving import serve_http
print(f"Starting service {service_name}") print(f"Starting service {service_name}")
svc.inject_config() svc.inject_config()
......
...@@ -23,13 +23,13 @@ import bentoml ...@@ -23,13 +23,13 @@ import bentoml
from pydantic import BaseModel from pydantic import BaseModel
class DynemoEndpoint: class DynamoEndpoint:
"""Decorator class for Dynemo endpoints""" """Decorator class for Dynamo endpoints"""
def __init__(self, func: t.Callable, name: str | None = None): def __init__(self, func: t.Callable, name: str | None = None):
self.func = func self.func = func
self.name = name or func.__name__ self.name = name or func.__name__
self.is_dynemo_endpoint = True self.is_dynamo_endpoint = True
# Extract request type from hints # Extract request type from hints
hints = get_type_hints(func) hints = get_type_hints(func)
...@@ -50,7 +50,7 @@ class DynemoEndpoint: ...@@ -50,7 +50,7 @@ class DynemoEndpoint:
if isinstance(args[1], (str, dict)): if isinstance(args[1], (str, dict)):
args[1] = self.request_type.parse_obj(args[1]) # type: ignore args[1] = self.request_type.parse_obj(args[1]) # type: ignore
# Convert Pydantic model to dict before passing to dynemo # Convert Pydantic model to dict before passing to dynamo
if len(args) > 1 and isinstance(args[1], BaseModel): if len(args) > 1 and isinstance(args[1], BaseModel):
args = list(args) # type: ignore args = list(args) # type: ignore
args[1] = args[1].model_dump() # type: ignore args[1] = args[1].model_dump() # type: ignore
...@@ -58,31 +58,31 @@ class DynemoEndpoint: ...@@ -58,31 +58,31 @@ class DynemoEndpoint:
return await self.func(*args, **kwargs) return await self.func(*args, **kwargs)
def dynemo_endpoint( def dynamo_endpoint(
name: str | None = None, name: str | None = None,
) -> t.Callable[[t.Callable], DynemoEndpoint]: ) -> t.Callable[[t.Callable], DynamoEndpoint]:
"""Decorator for Dynemo endpoints. """Decorator for Dynamo endpoints.
Args: Args:
name: Optional name for the endpoint. Defaults to function name. name: Optional name for the endpoint. Defaults to function name.
Example: Example:
@dynemo_endpoint() @dynamo_endpoint()
def my_endpoint(self, input: str) -> str: def my_endpoint(self, input: str) -> str:
return input return input
@dynemo_endpoint(name="custom_name") @dynamo_endpoint(name="custom_name")
def another_endpoint(self, input: str) -> str: def another_endpoint(self, input: str) -> str:
return input return input
""" """
def decorator(func: t.Callable) -> DynemoEndpoint: def decorator(func: t.Callable) -> DynamoEndpoint:
return DynemoEndpoint(func, name) return DynamoEndpoint(func, name)
return decorator return decorator
def dynemo_api(func: t.Callable) -> t.Callable: def dynamo_api(func: t.Callable) -> t.Callable:
"""Decorator for BentoML API endpoints. """Decorator for BentoML API endpoints.
Args: Args:
......
...@@ -19,30 +19,30 @@ from typing import Any, Dict, Optional, TypeVar ...@@ -19,30 +19,30 @@ from typing import Any, Dict, Optional, TypeVar
from _bentoml_sdk.service import Service from _bentoml_sdk.service import Service
from _bentoml_sdk.service.dependency import Dependency from _bentoml_sdk.service.dependency import Dependency
from dynemo.sdk.lib.service import CompoundService from dynamo.sdk.lib.service import DynamoService
T = TypeVar("T") T = TypeVar("T")
class DynemoClient: class DynamoClient:
"""Client for calling Dynemo endpoints with streaming support""" """Client for calling Dynamo endpoints with streaming support"""
def __init__(self, service: CompoundService[Any]): def __init__(self, service: DynamoService[Any]):
self._service = service self._service = service
self._endpoints = service.get_dynemo_endpoints() self._endpoints = service.get_dynamo_endpoints()
self._dynemo_clients: Dict[str, Any] = {} self._dynamo_clients: Dict[str, Any] = {}
self._runtime = None self._runtime = None
def __getattr__(self, name: str) -> Any: def __getattr__(self, name: str) -> Any:
if name not in self._endpoints: if name not in self._endpoints:
raise AttributeError( raise AttributeError(
f"No Dynemo endpoint '{name}' found on service '{self._service.name}'. " f"No Dynamo endpoint '{name}' found on service '{self._service.name}'. "
f"Available endpoints: {list(self._endpoints.keys())}" f"Available endpoints: {list(self._endpoints.keys())}"
) )
# For streaming endpoints, create/cache the stream function # For streaming endpoints, create/cache the stream function
if name not in self._dynemo_clients: if name not in self._dynamo_clients:
namespace, component_name = self._service.dynemo_address() namespace, component_name = self._service.dynamo_address()
# Create async generator function that uses Queue for streaming # Create async generator function that uses Queue for streaming
async def get_stream(*args, **kwargs): async def get_stream(*args, **kwargs):
...@@ -61,8 +61,9 @@ class DynemoClient: ...@@ -61,8 +61,9 @@ class DynemoClient:
# TODO: Potentially model dump for a user here so they can pass around Pydantic models # TODO: Potentially model dump for a user here so they can pass around Pydantic models
stream = await client.generate(*args, **kwargs) stream = await client.generate(*args, **kwargs)
print("here8", stream, flush=True)
async for item in stream: async for item in stream:
print(item, flush=True)
data = item.data() data = item.data()
print(f"Item data: {data}") print(f"Item data: {data}")
await queue.put(data) await queue.put(data)
...@@ -72,10 +73,10 @@ class DynemoClient: ...@@ -72,10 +73,10 @@ class DynemoClient:
raise raise
else: else:
# Create dynemo worker if no runtime # Create dynamo worker if no runtime
from dynemo.runtime import DistributedRuntime, dynemo_worker from dynamo.runtime import DistributedRuntime, dynamo_worker
@dynemo_worker() @dynamo_worker()
async def stream_worker(runtime: DistributedRuntime): async def stream_worker(runtime: DistributedRuntime):
try: try:
# Store runtime for future use # Store runtime for future use
...@@ -89,8 +90,9 @@ class DynemoClient: ...@@ -89,8 +90,9 @@ class DynemoClient:
) )
stream = await client.generate(*args, **kwargs) stream = await client.generate(*args, **kwargs)
print(stream, flush=True)
async for item in stream: async for item in stream:
print(item, flush=True)
data = item.data() data = item.data()
print(f"Item data: {data}") print(f"Item data: {data}")
await queue.put(data) await queue.put(data)
...@@ -115,13 +117,13 @@ class DynemoClient: ...@@ -115,13 +117,13 @@ class DynemoClient:
except Exception: except Exception:
raise raise
self._dynemo_clients[name] = get_stream self._dynamo_clients[name] = get_stream
return self._dynemo_clients[name] return self._dynamo_clients[name]
class DynemoDependency(Dependency[T]): class DynamoDependency(Dependency[T]):
"""Enhanced dependency that supports Dynemo endpoints""" """Enhanced dependency that supports Dynamo endpoints"""
def __init__( def __init__(
self, self,
...@@ -131,7 +133,7 @@ class DynemoDependency(Dependency[T]): ...@@ -131,7 +133,7 @@ class DynemoDependency(Dependency[T]):
cluster: str | None = None, cluster: str | None = None,
): ):
super().__init__(on, url=url, deployment=deployment, cluster=cluster) super().__init__(on, url=url, deployment=deployment, cluster=cluster)
self._dynemo_client: Optional[DynemoClient] = None self._dynamo_client: Optional[DynamoClient] = None
self._runtime = None self._runtime = None
# offers an escape hatch to get the endpoint directly # offers an escape hatch to get the endpoint directly
...@@ -143,7 +145,7 @@ class DynemoDependency(Dependency[T]): ...@@ -143,7 +145,7 @@ class DynemoDependency(Dependency[T]):
... ...
await dep.get_endpoint("generate") # equivalent to the following await dep.get_endpoint("generate") # equivalent to the following
router_client = ( router_client = (
await runtime.namespace("dynemo-init") await runtime.namespace("dynamo-init")
.component("router") .component("router")
.endpoint("generate") .endpoint("generate")
.client() .client()
...@@ -153,13 +155,13 @@ class DynemoDependency(Dependency[T]): ...@@ -153,13 +155,13 @@ class DynemoDependency(Dependency[T]):
# TODO: Read the runtime from the tdist since it is not stored in global # TODO: Read the runtime from the tdist since it is not stored in global
if self._runtime is None: if self._runtime is None:
print( print(
"Get Endpoint: Runtime not set for DynemoDependency. Cannot get endpoint." "Get Endpoint: Runtime not set for DynamoDependency. Cannot get endpoint."
) )
raise ValueError("Runtime not set for DynemoDependency") raise ValueError("Runtime not set for DynamoDependency")
address = self.on.dynemo_address() address = self.on.dynamo_address()
comp_ns, comp_name = address comp_ns, comp_name = address
print("Get Endpoint: Dynemo ADDRESS: ", address) print("Get Endpoint: Dynamo ADDRESS: ", address)
return ( return (
await self._runtime.namespace(comp_ns) await self._runtime.namespace(comp_ns)
.component(comp_name) .component(comp_name)
...@@ -168,19 +170,19 @@ class DynemoDependency(Dependency[T]): ...@@ -168,19 +170,19 @@ class DynemoDependency(Dependency[T]):
) )
def set_runtime(self, runtime: Any) -> None: def set_runtime(self, runtime: Any) -> None:
"""Set the Dynemo runtime for this dependency""" """Set the Dynamo runtime for this dependency"""
self._runtime = runtime self._runtime = runtime
if self._dynemo_client: if self._dynamo_client:
self._dynemo_client._runtime = runtime self._dynamo_client._runtime = runtime
def get(self, *args: Any, **kwargs: Any) -> T | Any: def get(self, *args: Any, **kwargs: Any) -> T | Any:
# If this is a Dynemo-enabled service, return the Dynemo client # If this is a Dynamo-enabled service, return the Dynamo client
if isinstance(self.on, CompoundService) and self.on.is_dynemo_component(): if isinstance(self.on, DynamoService) and self.on.is_dynamo_component():
if self._dynemo_client is None: if self._dynamo_client is None:
self._dynemo_client = DynemoClient(self.on) self._dynamo_client = DynamoClient(self.on)
if self._runtime: if self._runtime:
self._dynemo_client._runtime = self._runtime self._dynamo_client._runtime = self._runtime
return self._dynemo_client return self._dynamo_client
# Otherwise fall back to normal BentoML dependency resolution # Otherwise fall back to normal BentoML dependency resolution
return super().get(*args, **kwargs) return super().get(*args, **kwargs)
...@@ -192,11 +194,11 @@ def depends( ...@@ -192,11 +194,11 @@ def depends(
url: str | None = None, url: str | None = None,
deployment: str | None = None, deployment: str | None = None,
cluster: str | None = None, cluster: str | None = None,
) -> DynemoDependency[T]: ) -> DynamoDependency[T]:
"""Create a dependency that's Dynemo-aware. """Create a dependency that's Dynamo-aware.
If the dependency is on a Dynemo-enabled service, this will return a client If the dependency is on a Dynamo-enabled service, this will return a client
that can call Dynemo endpoints. Otherwise behaves like normal BentoML dependency. that can call Dynamo endpoints. Otherwise behaves like normal BentoML dependency.
Args: Args:
on: The service to depend on on: The service to depend on
...@@ -205,8 +207,8 @@ def depends( ...@@ -205,8 +207,8 @@ def depends(
cluster: Cluster name cluster: Cluster name
Raises: Raises:
AttributeError: When trying to call a non-existent Dynemo endpoint AttributeError: When trying to call a non-existent Dynamo endpoint
""" """
if on is not None and not isinstance(on, Service): if on is not None and not isinstance(on, Service):
raise TypeError("depends() expects a class decorated with @service()") raise TypeError("depends() expects a class decorated with @service()")
return DynemoDependency(on, url=url, deployment=deployment, cluster=cluster) return DynamoDependency(on, url=url, deployment=deployment, cluster=cluster)
...@@ -13,13 +13,13 @@ ...@@ -13,13 +13,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# wrapper over bento images to handle Dynemo base image # wrapper over bento images to handle Dynamo base image
import os import os
import bentoml import bentoml
# TODO: "dynemo:latest-vllm" image will not be available to image builder in k8s # TODO: "dynamo:latest-vllm" image will not be available to image builder in k8s
# so We'd consider publishing the base image for releases to public nvcr.io registry. # so We'd consider publishing the base image for releases to public nvcr.io registry.
image_name = os.getenv("DYNEMO_IMAGE", "dynemo:latest-vllm") image_name = os.getenv("DYNEMO_IMAGE", "dynamo:latest-vllm")
DYNEMO_IMAGE = bentoml.images.PythonImage(base_image=image_name) DYNEMO_IMAGE = bentoml.images.PythonImage(base_image=image_name)
...@@ -22,22 +22,22 @@ from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union ...@@ -22,22 +22,22 @@ from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union
from _bentoml_sdk import Service, ServiceConfig from _bentoml_sdk import Service, ServiceConfig
from _bentoml_sdk.images import Image from _bentoml_sdk.images import Image
from dynemo.sdk.lib.decorators import DynemoEndpoint from dynamo.sdk.lib.decorators import DynamoEndpoint
T = TypeVar("T", bound=object) T = TypeVar("T", bound=object)
@dataclass @dataclass
class DynemoConfig: class DynamoConfig:
"""Configuration for Dynemo components""" """Configuration for Dynamo components"""
enabled: bool = False enabled: bool = False
name: str | None = None name: str | None = None
namespace: str | None = None namespace: str | None = None
class CompoundService(Service[T]): class DynamoService(Service[T]):
"""A custom service class that extends BentoML's base Service with Dynemo capabilities""" """A custom service class that extends BentoML's base Service with Dynamo capabilities"""
def __init__( def __init__(
self, self,
...@@ -45,71 +45,71 @@ class CompoundService(Service[T]): ...@@ -45,71 +45,71 @@ class CompoundService(Service[T]):
inner: type[T], inner: type[T],
image: Optional[Image] = None, image: Optional[Image] = None,
envs: Optional[list[dict[str, Any]]] = None, envs: Optional[list[dict[str, Any]]] = None,
dynemo_config: Optional[DynemoConfig] = None, dynamo_config: Optional[DynamoConfig] = None,
): ):
super().__init__(config=config, inner=inner, image=image, envs=envs or []) super().__init__(config=config, inner=inner, image=image, envs=envs or [])
# Initialize Dynemo configuration # Initialize Dynamo configuration
self._dynemo_config = ( self._dynamo_config = (
dynemo_config dynamo_config
if dynemo_config if dynamo_config
else DynemoConfig(name=inner.__name__, namespace="default") else DynamoConfig(name=inner.__name__, namespace="default")
) )
if self._dynemo_config.name is None: if self._dynamo_config.name is None:
self._dynemo_config.name = inner.__name__ self._dynamo_config.name = inner.__name__
# Register Dynemo endpoints # Register Dynamo endpoints
self._dynemo_endpoints: Dict[str, DynemoEndpoint] = {} self._dynamo_endpoints: Dict[str, DynamoEndpoint] = {}
for field in dir(inner): for field in dir(inner):
value = getattr(inner, field) value = getattr(inner, field)
if isinstance(value, DynemoEndpoint): if isinstance(value, DynamoEndpoint):
self._dynemo_endpoints[value.name] = value self._dynamo_endpoints[value.name] = value
def is_dynemo_component(self) -> bool: def is_dynamo_component(self) -> bool:
"""Check if this service is configured as a Dynemo component""" """Check if this service is configured as a Dynamo component"""
return self._dynemo_config.enabled return self._dynamo_config.enabled
def dynemo_address(self) -> Tuple[Optional[str], Optional[str]]: def dynamo_address(self) -> Tuple[Optional[str], Optional[str]]:
"""Get the Dynemo address for this component in namespace/name format""" """Get the Dynamo address for this component in namespace/name format"""
if not self.is_dynemo_component(): if not self.is_dynamo_component():
raise ValueError("Service is not configured as a Dynemo component") raise ValueError("Service is not configured as a Dynamo component")
# Check if we have a runner map with Dynemo address # Check if we have a runner map with Dynamo address
runner_map = os.environ.get("BENTOML_RUNNER_MAP") runner_map = os.environ.get("BENTOML_RUNNER_MAP")
if runner_map: if runner_map:
try: try:
runners = json.loads(runner_map) runners = json.loads(runner_map)
if self.name in runners: if self.name in runners:
address = runners[self.name] address = runners[self.name]
if address.startswith("dynemo://"): if address.startswith("dynamo://"):
# Parse dynemo://namespace/name into (namespace, name) # Parse dynamo://namespace/name into (namespace, name)
_, path = address.split("://", 1) _, path = address.split("://", 1)
namespace, name = path.split("/", 1) namespace, name = path.split("/", 1)
print( print(
f"Resolved Dynemo address from runner map: {namespace}/{name}" f"Resolved Dynamo address from runner map: {namespace}/{name}"
) )
return (namespace, name) return (namespace, name)
except (json.JSONDecodeError, ValueError) as e: except (json.JSONDecodeError, ValueError) as e:
raise ValueError(f"Failed to parse BENTOML_RUNNER_MAP: {str(e)}") from e raise ValueError(f"Failed to parse BENTOML_RUNNER_MAP: {str(e)}") from e
print( print(
f"Using default Dynemo address: {self._dynemo_config.namespace}/{self._dynemo_config.name}" f"Using default Dynamo address: {self._dynamo_config.namespace}/{self._dynamo_config.name}"
) )
return (self._dynemo_config.namespace, self._dynemo_config.name) return (self._dynamo_config.namespace, self._dynamo_config.name)
def get_dynemo_endpoints(self) -> Dict[str, DynemoEndpoint]: def get_dynamo_endpoints(self) -> Dict[str, DynamoEndpoint]:
"""Get all registered Dynemo endpoints""" """Get all registered Dynamo endpoints"""
return self._dynemo_endpoints return self._dynamo_endpoints
def get_dynemo_endpoint(self, name: str) -> DynemoEndpoint: def get_dynamo_endpoint(self, name: str) -> DynamoEndpoint:
"""Get a specific Dynemo endpoint by name""" """Get a specific Dynamo endpoint by name"""
if name not in self._dynemo_endpoints: if name not in self._dynamo_endpoints:
raise ValueError(f"No Dynemo endpoint found with name: {name}") raise ValueError(f"No Dynamo endpoint found with name: {name}")
return self._dynemo_endpoints[name] return self._dynamo_endpoints[name]
def list_dynemo_endpoints(self) -> List[str]: def list_dynamo_endpoints(self) -> List[str]:
"""List names of all registered Dynemo endpoints""" """List names of all registered Dynamo endpoints"""
return list(self._dynemo_endpoints.keys()) return list(self._dynamo_endpoints.keys())
# todo: add another function to bind an instance of the inner to the self within these methods # todo: add another function to bind an instance of the inner to the self within these methods
...@@ -120,13 +120,13 @@ def service( ...@@ -120,13 +120,13 @@ def service(
*, *,
image: Optional[Image] = None, image: Optional[Image] = None,
envs: Optional[list[dict[str, Any]]] = None, envs: Optional[list[dict[str, Any]]] = None,
dynemo: Optional[Union[Dict[str, Any], DynemoConfig]] = None, dynamo: Optional[Union[Dict[str, Any], DynamoConfig]] = None,
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
"""Enhanced service decorator that supports Dynemo configuration """Enhanced service decorator that supports Dynamo configuration
Args: Args:
dynemo: Dynemo configuration, either as a DynemoConfig object or dict with keys: dynamo: Dynamo configuration, either as a DynamoConfig object or dict with keys:
- enabled: bool (default True) - enabled: bool (default True)
- name: str (default: class name) - name: str (default: class name)
- namespace: str (default: "default") - namespace: str (default: "default")
...@@ -134,23 +134,23 @@ def service( ...@@ -134,23 +134,23 @@ def service(
""" """
config = kwargs config = kwargs
# Parse dict into DynemoConfig object # Parse dict into DynamoConfig object
dynemo_config: Optional[DynemoConfig] = None dynamo_config: Optional[DynamoConfig] = None
if dynemo is not None: if dynamo is not None:
if isinstance(dynemo, dict): if isinstance(dynamo, dict):
dynemo_config = DynemoConfig(**dynemo) dynamo_config = DynamoConfig(**dynamo)
else: else:
dynemo_config = dynemo dynamo_config = dynamo
def decorator(inner: type[T]) -> CompoundService[T]: def decorator(inner: type[T]) -> DynamoService[T]:
if isinstance(inner, Service): if isinstance(inner, Service):
raise TypeError("service() decorator can only be applied once") raise TypeError("service() decorator can only be applied once")
return CompoundService( return DynamoService(
config=config, config=config,
inner=inner, inner=inner,
image=image, image=image,
envs=envs or [], envs=envs or [],
dynemo_config=dynemo_config, dynamo_config=dynamo_config,
) )
return decorator(inner) if inner is not None else decorator return decorator(inner) if inner is not None else decorator
...@@ -316,7 +316,7 @@ wheels = [ ...@@ -316,7 +316,7 @@ wheels = [
] ]
[[package]] [[package]]
name = "dynemo-sdk" name = "dynamo-sdk"
version = "0.1.0" version = "0.1.0"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
......
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
root = "."
testdata_dir = "testdata"
tmp_dir = "tmp"
[build]
args_bin = []
bin = "./tmp/main"
cmd = "go build -o ./tmp/main ./api"
delay = 1000
exclude_dir = ["assets", "tmp", "vendor", "testdata"]
exclude_file = []
exclude_regex = ["_test.go"]
exclude_unchanged = false
follow_symlink = false
full_bin = ""
include_dir = []
include_ext = ["go", "tpl", "tmpl", "html"]
include_file = []
kill_delay = "0s"
log = "build-errors.log"
poll = false
poll_interval = 0
post_cmd = []
pre_cmd = []
rerun = false
rerun_delay = 500
send_interrupt = false
stop_on_error = false
[color]
app = ""
build = "yellow"
main = "magenta"
runner = "green"
watcher = "cyan"
[log]
main_only = false
time = false
[misc]
clean_on_exit = false
[proxy]
app_port = 0
enabled = false
proxy_port = 0
[screen]
clear_on_rebuild = false
keep_scroll = true
# Local development env
DB_USER="postgres"
DB_PASSWORD="pgadmin"
DB_HOST="localhost"
DB_PORT=5432
DB_NAME="postgres"
DMS_HOST="localhost"
DMS_PORT=8080
NDS_HOST="localhost"
NDS_PORT=8001
DEFAULT_KUBE_NAMESPACE="compoundai"
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
bin/*
Dockerfile.cross
# Test binary, built with `go test -c`
*.test
# Temporary folder used by Air
tmp
\ No newline at end of file
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Build the manager binary
FROM golang:1.23 AS builder
ARG TARGETOS
ARG TARGETARCH
WORKDIR /workspace
# Copy the Go Modules manifests
COPY go.mod go.mod
COPY go.sum go.sum
# cache deps before building and copying source so that we don't need to re-download as much
# and so that source changes don't invalidate our downloaded layer
RUN go mod download
# Copy the go source
COPY api/ api/
COPY .env .env
# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
# was called. For example, if we call make docker-build in a local env which has the Apple Silicon M1 SO
# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o server api/main.go
# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
FROM gcr.io/distroless/static:nonroot
WORKDIR /
COPY --from=builder /workspace/server .
COPY --from=builder /workspace/.env .
USER 65532:65532
ENTRYPOINT ["/server"]
VERSION 0.8
build:
FROM golang:1.23
ARG TARGETOS
ARG TARGETARCH
WORKDIR /workspace
COPY go.mod go.mod
COPY go.sum go.sum
RUN go mod download
COPY api/ api/
COPY .env .env
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o server api/main.go
SAVE ARTIFACT /workspace/server
SAVE ARTIFACT /workspace/.env
#TODO: mkhadkevich earthly tests fail https://gitlab-master.nvidia.com/aire/microservices/compoundai/-/jobs/144475821
#test:
# FROM +build
# # copy test files
# COPY tests/ tests/
# RUN go test ./...
docker:
ARG CI_REGISTRY_IMAGE=my-registry
ARG CI_COMMIT_SHA=latest
ARG IMAGE=compound-api-server
FROM gcr.io/distroless/static:nonroot
WORKDIR /
COPY +build/server .
COPY +build/.env .
USER 65532:65532
ENTRYPOINT ["/server"]
SAVE IMAGE --push $CI_REGISTRY_IMAGE/$IMAGE:$CI_COMMIT_SHA
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment