"examples/multimodal_v1/components/direct_processor.py" did not exist on "861c50982b702abbece18ad4cdc46dbe9a10cbd6"
Unverified Commit 33e72720 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: support k8s target in dynamo deploy command (#1104)

parent 31ff2370
## Provision S3-compatible cloud object storage:
The Dynamo API Server requires a s3-compatible object store to store Dynamo NIMs.
The Dynamo API Server requires a s3-compatible object store to store Dynamo Components.
## Provision PostgreSQL Database
The Dynamo API Server requires a PostgreSQL database to store data entity and version metadata.
......@@ -23,4 +23,4 @@ ai-dynamo-store
uv pip install -e ".[dev]"
#### Run docker container locally
earthly +docker && docker run -it my-registry/ai-dynamo-store:latest
\ No newline at end of file
earthly +docker && docker run -it my-registry/ai-dynamo-store:latest
......@@ -39,7 +39,7 @@ class TimeCreatedUpdated(SQLModel):
)
class DynamoNimUploadStatus(str, Enum):
class DynamoComponentUploadStatus(str, Enum):
Pending = "pending"
Uploading = "uploading"
Success = "success"
......@@ -62,22 +62,22 @@ class TransmissionStrategy(str, Enum):
"""
class CreateDynamoNimRequest(BaseModel):
class CreateDynamoComponentRequest(BaseModel):
name: str
description: str
labels: Optional[Dict[str, str]] = None
class CreateDynamoNimVersionRequest(BaseModel):
class CreateDynamoComponentVersionRequest(BaseModel):
description: str
version: str
manifest: DynamoNimVersionManifestSchema
manifest: DynamoComponentVersionManifestSchema
build_at: datetime
labels: Optional[list[Dict[str, str]]] = None
class UpdateDynamoNimVersionRequest(BaseModel):
manifest: DynamoNimVersionManifestSchema
class UpdateDynamoComponentVersionRequest(BaseModel):
manifest: DynamoComponentVersionManifestSchema
labels: Optional[list[Dict[str, str]]] = None
......@@ -113,8 +113,8 @@ class ListQuerySchema(BaseModel):
class ResourceType(str, Enum):
Organization = "organization"
Cluster = "cluster"
DynamoNim = "dynamo_nim"
DynamoNimVersion = "dynamo_nim_version"
DynamoComponent = "dynamo_component"
DynamoComponentVersion = "dynamo_component_version"
Deployment = "deployment"
DeploymentRevision = "deployment_revision"
TerminalRecord = "terminal_record"
......@@ -156,29 +156,29 @@ class UserSchema(BaseModel):
last_name: str
class DynamoNimVersionApiSchema(BaseModel):
class DynamoComponentVersionApiSchema(BaseModel):
route: str
doc: str
input: str
output: str
class DynamoNimVersionManifestSchema(BaseModel):
class DynamoComponentVersionManifestSchema(BaseModel):
service: str
bentoml_version: str
apis: Dict[str, DynamoNimVersionApiSchema]
bentoml_version: Optional[str] = None
apis: Dict[str, DynamoComponentVersionApiSchema]
size_bytes: int
def _validate_manifest(v):
try:
# Validate that the 'manifest' matches the DynamoManifestSchema
return DynamoNimVersionManifestSchema.model_validate(v).model_dump()
return DynamoComponentVersionManifestSchema.model_validate(v).model_dump()
except ValidationError as e:
raise ValueError(f"Invalid manifest schema: {e}")
class DynamoNimVersionSchema(ResourceSchema):
class DynamoComponentVersionSchema(ResourceSchema):
bento_repository_uid: str
version: str
description: str
......@@ -192,7 +192,7 @@ class DynamoNimVersionSchema(ResourceSchema):
presigned_urls_deprecated: bool = False
transmission_strategy: TransmissionStrategy
upload_id: str = ""
manifest: Optional[Union[DynamoNimVersionManifestSchema, Dict[str, Any]]]
manifest: Optional[Union[DynamoComponentVersionManifestSchema, Dict[str, Any]]]
build_at: datetime
@field_validator("manifest")
......@@ -200,31 +200,31 @@ class DynamoNimVersionSchema(ResourceSchema):
return _validate_manifest(v)
class DynamoNimVersionFullSchema(DynamoNimVersionSchema):
repository: DynamoNimSchema
class DynamoComponentVersionFullSchema(DynamoComponentVersionSchema):
repository: DynamoComponentSchema
class DynamoNimSchema(ResourceSchema):
latest_bento: Optional[DynamoNimVersionSchema]
latest_bentos: Optional[List[DynamoNimVersionSchema]]
class DynamoComponentSchema(ResourceSchema):
latest_bento: Optional[DynamoComponentVersionSchema]
latest_bentos: Optional[List[DynamoComponentVersionSchema]]
n_bentos: int
description: str
class DynamoNimSchemaWithDeploymentsSchema(DynamoNimSchema):
class DynamoComponentSchemaWithDeploymentsSchema(DynamoComponentSchema):
deployments: List[str] = [] # mocked for now
class DynamoNimSchemaWithDeploymentsListSchema(BaseListSchema):
items: List[DynamoNimSchemaWithDeploymentsSchema]
class DynamoComponentSchemaWithDeploymentsListSchema(BaseListSchema):
items: List[DynamoComponentSchemaWithDeploymentsSchema]
class DynamoNimVersionsWithNimListSchema(BaseListSchema):
items: List[DynamoNimVersionWithNimSchema]
class DynamoComponentVersionsWithNimListSchema(BaseListSchema):
items: List[DynamoComponentVersionWithNimSchema]
class DynamoNimVersionWithNimSchema(DynamoNimVersionSchema):
repository: DynamoNimSchema
class DynamoComponentVersionWithNimSchema(DynamoComponentVersionSchema):
repository: DynamoComponentSchema
"""
......@@ -232,16 +232,16 @@ class DynamoNimVersionWithNimSchema(DynamoNimVersionSchema):
"""
class BaseDynamoNimModel(TimeCreatedUpdated, AsyncAttrs):
class BaseDynamoComponentModel(TimeCreatedUpdated, AsyncAttrs):
deleted_at: Optional[datetime] = SQLField(nullable=True, default=None)
class DynamoNimVersionBase(BaseDynamoNimModel):
class DynamoComponentVersionBase(BaseDynamoComponentModel):
version: str = SQLField(default=None)
description: str = SQLField(default="")
file_path: Optional[str] = SQLField(default=None)
file_oid: Optional[str] = SQLField(default=None) # Used for GIT Lfs access
upload_status: DynamoNimUploadStatus = SQLField()
upload_status: DynamoComponentUploadStatus = SQLField()
image_build_status: ImageBuildStatus = SQLField()
image_build_status_syncing_at: Optional[datetime] = SQLField(default=None)
image_build_status_updated_at: Optional[datetime] = SQLField(default=None)
......@@ -249,7 +249,7 @@ class DynamoNimVersionBase(BaseDynamoNimModel):
upload_finished_at: Optional[datetime] = SQLField(default=None)
upload_finished_reason: str = SQLField(default="")
manifest: Optional[
Union[DynamoNimVersionManifestSchema, Dict[str, Any]]
Union[DynamoComponentVersionManifestSchema, Dict[str, Any]]
] = SQLField(
default=None, sa_column=Column(JSON)
) # JSON-like field for the manifest
......@@ -260,6 +260,6 @@ class DynamoNimVersionBase(BaseDynamoNimModel):
return _validate_manifest(v)
class DynamoNimBase(BaseDynamoNimModel):
class DynamoComponentBase(BaseDynamoComponentModel):
name: str = SQLField(default="", unique=True)
description: str = SQLField(default="")
......@@ -40,13 +40,13 @@ from .utils import build_latest_revision_from_cr, get_deployment_status, get_url
router = APIRouter(prefix="/api/v2/deployments", tags=["deployments"])
def sanitize_deployment_name(name: Optional[str], dynamo_nim: str) -> str:
def sanitize_deployment_name(name: Optional[str], dynamo_component: str) -> str:
"""
Resolve a name for the DynamoGraphDeployment that will work safely in k8s
Args:
name: Optional custom name
dynamo_nim: Bento name and version (format: name:version)
dynamo_component: Component name and version (format: name:version)
Returns:
A unique deployment name that is at most 63 characters
......@@ -55,11 +55,11 @@ def sanitize_deployment_name(name: Optional[str], dynamo_nim: str) -> str:
# If name is provided, truncate it to 63
base_name = name[:63]
else:
# Generate base name from dynamoNim
dynamo_nim_parts = dynamo_nim.split(":")
if len(dynamo_nim_parts) != 2:
raise ValueError("Invalid dynamoNim format, expected 'name:version'")
base_name = f"dep-{dynamo_nim_parts[0]}-{dynamo_nim_parts[1]}"
# Generate base name from dynamo_component
dynamo_component_parts = dynamo_component.split(":")
if len(dynamo_component_parts) != 2:
raise ValueError("Invalid dynamo_component format, expected 'name:version'")
base_name = f"dep-{dynamo_component_parts[0]}-{dynamo_component_parts[1]}"
# Truncate to 63 chars
base_name = base_name[:63]
......@@ -91,7 +91,7 @@ async def create_deployment(deployment: CreateDeploymentSchema):
created_crd = create_dynamo_deployment(
name=deployment_name,
namespace=kube_namespace,
dynamo_nim=deployment.bento,
dynamo_component=deployment.bento or deployment.component,
labels={
"ngc-organization": ownership["organization_id"],
"ngc-user": ownership["user_id"],
......
......@@ -26,33 +26,33 @@ from sqlmodel import col, desc, func, select
from sqlmodel.ext.asyncio.session import AsyncSession
from .components import (
CreateDynamoNimRequest,
CreateDynamoNimVersionRequest,
DynamoNimSchema,
DynamoNimSchemaWithDeploymentsListSchema,
DynamoNimSchemaWithDeploymentsSchema,
DynamoNimUploadStatus,
DynamoNimVersionFullSchema,
DynamoNimVersionSchema,
DynamoNimVersionsWithNimListSchema,
DynamoNimVersionWithNimSchema,
CreateDynamoComponentRequest,
CreateDynamoComponentVersionRequest,
DynamoComponentSchema,
DynamoComponentSchemaWithDeploymentsListSchema,
DynamoComponentSchemaWithDeploymentsSchema,
DynamoComponentUploadStatus,
DynamoComponentVersionFullSchema,
DynamoComponentVersionSchema,
DynamoComponentVersionsWithNimListSchema,
DynamoComponentVersionWithNimSchema,
ImageBuildStatus,
ListQuerySchema,
OrganizationSchema,
ResourceType,
TransmissionStrategy,
UpdateDynamoNimVersionRequest,
UpdateDynamoComponentVersionRequest,
UserSchema,
)
from .model import DynamoNim, DynamoNimVersion, make_aware, utc_now_naive
from .model import DynamoComponent, DynamoComponentVersion, make_aware, utc_now_naive
from .storage import S3Storage, get_s3_storage, get_session
API_TAG_MODELS = "dynamo"
DEFAULT_LIMIT = 3
SORTABLE_COLUMNS = {
"created_at": col(DynamoNim.created_at),
"update_at": col(DynamoNim.updated_at),
"created_at": col(DynamoComponent.created_at),
"update_at": col(DynamoComponent.updated_at),
}
router = APIRouter(prefix="/api/v1")
......@@ -101,26 +101,28 @@ async def current_org(
)
# GetDynamoNim is a FastAPI dependency that will perform stored model lookup.
async def dynamo_nim_handler(
# GetDynamoComponent is a FastAPI dependency that will perform stored model lookup.
async def dynamo_component_handler(
*,
session: AsyncSession = Depends(get_session),
dynamo_nim_name: str,
) -> DynamoNim:
statement = select(DynamoNim).where(DynamoNim.name == dynamo_nim_name)
stored_dynamo_nim_result = await session.exec(statement)
stored_dynamo_nim = stored_dynamo_nim_result.first()
if not stored_dynamo_nim:
dynamo_component_name: str,
) -> DynamoComponent:
statement = select(DynamoComponent).where(
DynamoComponent.name == dynamo_component_name
)
stored_dynamo_component_result = await session.exec(statement)
stored_dynamo_component = stored_dynamo_component_result.first()
if not stored_dynamo_component:
raise HTTPException(status_code=404, detail="Record not found")
return stored_dynamo_nim
return stored_dynamo_component
GetDynamoNim = Depends(dynamo_nim_handler)
GetDynamoComponent = Depends(dynamo_component_handler)
@router.get(
"/bento_repositories/{dynamo_nim_name}",
"/bento_repositories/{dynamo_component_name}",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
......@@ -128,48 +130,50 @@ GetDynamoNim = Depends(dynamo_nim_handler)
tags=[API_TAG_MODELS],
)
@router.get(
"/dynamo_nims/{dynamo_nim_name}",
"/dynamo_components/{dynamo_component_name}",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
},
tags=[API_TAG_MODELS],
)
async def get_dynamo_nim(
async def get_dynamo_component(
*,
dynamo_nim: DynamoNim = GetDynamoNim,
dynamo_component: DynamoComponent = GetDynamoComponent,
session: AsyncSession = Depends(get_session),
):
dynamo_nim_id = dynamo_nim.id
dynamo_component_id = dynamo_component.id
statement = (
select(DynamoNimVersion)
select(DynamoComponentVersion)
.where(
DynamoNimVersion.dynamo_nim_id == dynamo_nim_id,
DynamoComponentVersion.dynamo_component_id == dynamo_component_id,
)
.order_by(desc(DynamoNimVersion.created_at))
.order_by(desc(DynamoComponentVersion.created_at))
)
result = await session.exec(statement)
dynamo_nims = result.all()
dynamo_components = result.all()
latest_dynamo_nim_versions = await convert_dynamo_nim_version_model_to_schema(
session, list(dynamo_nims), dynamo_nim
latest_dynamo_component_versions = (
await convert_dynamo_component_version_model_to_schema(
session, list(dynamo_components), dynamo_component
)
)
return DynamoNimSchema(
uid=dynamo_nim.id,
created_at=dynamo_nim.created_at,
updated_at=dynamo_nim.updated_at,
deleted_at=dynamo_nim.deleted_at,
name=dynamo_nim.name,
resource_type=ResourceType.DynamoNim,
return DynamoComponentSchema(
uid=dynamo_component.id,
created_at=dynamo_component.created_at,
updated_at=dynamo_component.updated_at,
deleted_at=dynamo_component.deleted_at,
name=dynamo_component.name,
resource_type=ResourceType.DynamoComponent,
labels=[],
description=dynamo_nim.description,
description=dynamo_component.description,
latest_bento=None
if not latest_dynamo_nim_versions
else latest_dynamo_nim_versions[0],
latest_bentos=latest_dynamo_nim_versions,
n_bentos=len(dynamo_nims),
if not latest_dynamo_component_versions
else latest_dynamo_component_versions[0],
latest_bentos=latest_dynamo_component_versions,
n_bentos=len(dynamo_components),
)
......@@ -182,41 +186,41 @@ async def get_dynamo_nim(
tags=[API_TAG_MODELS],
)
@router.post(
"/dynamo_nims",
"/dynamo_components",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
},
tags=[API_TAG_MODELS],
)
async def create_dynamo_nim(
async def create_dynamo_component(
*,
session: AsyncSession = Depends(get_session),
request: CreateDynamoNimRequest,
request: CreateDynamoComponentRequest,
):
"""
Create a new respository
"""
try:
db_dynamo_nim = DynamoNim.model_validate(request)
db_dynamo_component = DynamoComponent.model_validate(request)
except ValidationError as e:
raise HTTPException(status_code=422, detail=json.loads(e.json())) # type: ignore
logger.debug("Creating repository...")
try:
session.add(db_dynamo_nim)
session.add(db_dynamo_component)
await session.flush()
await session.refresh(db_dynamo_nim)
await session.refresh(db_dynamo_component)
except IntegrityError as e:
logger.error(f"Details: {str(e)}")
await session.rollback()
logger.error(
f"The requested Dynamo NIM {db_dynamo_nim.name} already exists in the database"
f"The requested Dynamo Component {db_dynamo_component.name} already exists in the database"
)
raise HTTPException(
status_code=422,
detail=f"The Dynamo NIM {db_dynamo_nim.name} already exists in the database",
detail=f"The Dynamo Component {db_dynamo_component.name} already exists in the database",
) # type: ignore
except SQLAlchemyError as e:
logger.error("Something went wrong with adding the repository")
......@@ -224,18 +228,18 @@ async def create_dynamo_nim(
await session.commit()
logger.debug(
f"Dynamo NIM {db_dynamo_nim.id} with name {db_dynamo_nim.name} saved to database"
f"Dynamo Component {db_dynamo_component.id} with name {db_dynamo_component.name} saved to database"
)
return DynamoNimSchema(
uid=db_dynamo_nim.id,
created_at=db_dynamo_nim.created_at,
updated_at=db_dynamo_nim.updated_at,
deleted_at=db_dynamo_nim.deleted_at,
name=db_dynamo_nim.name,
resource_type=ResourceType.DynamoNim,
return DynamoComponentSchema(
uid=db_dynamo_component.id,
created_at=db_dynamo_component.created_at,
updated_at=db_dynamo_component.updated_at,
deleted_at=db_dynamo_component.deleted_at,
name=db_dynamo_component.name,
resource_type=ResourceType.DynamoComponent,
labels=[],
description=db_dynamo_nim.description,
description=db_dynamo_component.description,
latest_bentos=None,
latest_bento=None,
n_bentos=0,
......@@ -251,102 +255,127 @@ async def create_dynamo_nim(
tags=[API_TAG_MODELS],
)
@router.get(
"/dynamo_nims",
"/dynamo_components",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
},
tags=[API_TAG_MODELS],
)
async def get_dynamo_nim_list(
async def get_dynamo_component_list(
*,
session: AsyncSession = Depends(get_session),
query_params: ListQuerySchema = Depends(),
):
try:
# Base query using SQLModel's select
statement = select(DynamoNim)
statement = select(DynamoComponent)
# Handle search query 'q'
if query_params.q:
statement = statement.where(DynamoNim.name.ilike(f"%{query_params.q}%"))
statement = statement.where(
DynamoComponent.name.ilike(f"%{query_params.q}%")
)
# Get total count using SQLModel
total_statement = select(func.count(DynamoNim.id)).select_from(statement)
total_statement = select(func.count(DynamoComponent.id)).select_from(statement)
# Execute count query
result = await session.exec(total_statement)
total = result.scalar() or 0
total = result.first() or 0
# Apply pagination and sorting
if query_params.sort_asc is not None:
statement = statement.order_by(
DynamoNim.created_at.asc()
DynamoComponent.created_at.asc()
if query_params.sort_asc
else DynamoNim.created_at.desc()
else DynamoComponent.created_at.desc()
)
statement = statement.offset(query_params.start).limit(query_params.count)
# Execute main query
result = await session.exec(statement)
dynamo_nims = result.scalars().all()
dynamo_components = result.all()
# Rest of your code remains the same
dynamo_nim_schemas = await convert_dynamo_nim_model_to_schema(
session, dynamo_nims
dynamo_component_schemas = await convert_dynamo_component_model_to_schema(
session, dynamo_components
)
dynamo_nims_with_deployments = [
DynamoNimSchemaWithDeploymentsSchema(
**dynamo_nim_schema.model_dump(), deployments=[]
dynamo_components_with_deployments = [
DynamoComponentSchemaWithDeploymentsSchema(
**dynamo_component_schema.model_dump(), deployments=[]
)
for dynamo_nim_schema in dynamo_nim_schemas
for dynamo_component_schema in dynamo_component_schemas
]
return DynamoNimSchemaWithDeploymentsListSchema(
return DynamoComponentSchemaWithDeploymentsListSchema(
total=total,
start=query_params.start,
count=query_params.count,
items=dynamo_nims_with_deployments,
items=dynamo_components_with_deployments,
)
except ValidationError as e:
raise HTTPException(status_code=422, detail=json.loads(e.json()))
async def dynamo_nim_version_handler(
async def dynamo_component_version_handler(
*,
session: AsyncSession = Depends(get_session),
dynamo_nim_name: str,
dynamo_component_name: str,
version: str,
) -> tuple[DynamoNimVersion, DynamoNim]:
statement = select(DynamoNimVersion, DynamoNim).where(
DynamoNimVersion.dynamo_nim_id == DynamoNim.id,
DynamoNimVersion.version == version,
DynamoNim.name == dynamo_nim_name,
) -> tuple[DynamoComponentVersion, DynamoComponent]:
# First check if the component exists
component_statement = select(DynamoComponent).where(
DynamoComponent.name == dynamo_component_name
)
component_result = await session.exec(component_statement)
component = component_result.first()
if not component:
logger.error(f"Dynamo Component '{dynamo_component_name}' not found")
raise HTTPException(
status_code=404,
detail=f"Dynamo Component '{dynamo_component_name}' not found",
)
# Then check for the specific version
statement = select(DynamoComponentVersion, DynamoComponent).where(
DynamoComponentVersion.dynamo_component_id == DynamoComponent.id,
DynamoComponentVersion.version == version,
DynamoComponent.name == dynamo_component_name,
)
result = await session.exec(statement)
records = result.all()
if not records:
logger.error("No Dynamo NIM version record found")
raise HTTPException(status_code=404, detail="Record not found")
logger.error(
f"No version '{version}' found for Dynamo Component '{dynamo_component_name}'"
)
raise HTTPException(
status_code=404,
detail=f"Version '{version}' not found for Dynamo Component '{dynamo_component_name}'",
)
if len(records) >= 2:
logger.error("Found multiple relations for Dynamo NIM version")
logger.error(
f"Found multiple relations for Dynamo Component version '{version}' of '{dynamo_component_name}'"
)
raise HTTPException(
status_code=422, detail="Found multiple relations for Dynamo NIM version"
status_code=422,
detail=f"Found multiple relations for Dynamo Component version '{version}' of '{dynamo_component_name}'",
)
return records[0]
GetDynamoNimVersion = Depends(dynamo_nim_version_handler)
GetDynamoComponentVersion = Depends(dynamo_component_version_handler)
@router.get(
"/bento_repositories/{dynamo_nim_name}/bentos/{version}",
"/bento_repositories/{dynamo_component_name}/bentos/{version}",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
......@@ -354,33 +383,39 @@ GetDynamoNimVersion = Depends(dynamo_nim_version_handler)
tags=[API_TAG_MODELS],
)
@router.get(
"/dynamo_nims/{dynamo_nim_name}/versions/{version}",
"/dynamo_components/{dynamo_component_name}/versions/{version}",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
},
tags=[API_TAG_MODELS],
)
async def get_dynamo_nim_version(
async def get_dynamo_component_version(
*,
dynamo_nim_entities: tuple[DynamoNimVersion, DynamoNim] = GetDynamoNimVersion,
dynamo_component_entities: tuple[
DynamoComponentVersion, DynamoComponent
] = GetDynamoComponentVersion,
session: AsyncSession = Depends(get_session),
):
dynamo_nim_version, dynamo_nim = dynamo_nim_entities
dynamo_nim_version_schemas = await convert_dynamo_nim_version_model_to_schema(
session, [dynamo_nim_version], dynamo_nim
dynamo_component_version, dynamo_component = dynamo_component_entities
dynamo_component_version_schemas = (
await convert_dynamo_component_version_model_to_schema(
session, [dynamo_component_version], dynamo_component
)
)
dynamo_component_schemas = await convert_dynamo_component_model_to_schema(
session, [dynamo_component]
)
dynamo_nim_schemas = await convert_dynamo_nim_model_to_schema(session, [dynamo_nim])
full_schema = DynamoNimVersionFullSchema(
**dynamo_nim_version_schemas[0].model_dump(),
repository=dynamo_nim_schemas[0],
full_schema = DynamoComponentVersionFullSchema(
**dynamo_component_version_schemas[0].model_dump(),
repository=dynamo_component_schemas[0],
)
return full_schema
@router.post(
"/bento_repositories/{dynamo_nim_name}/bentos",
"/bento_repositories/{dynamo_component_name}/bentos",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
......@@ -388,16 +423,16 @@ async def get_dynamo_nim_version(
tags=[API_TAG_MODELS],
)
@router.post(
"/dynamo_nims/{dynamo_nim_name}/versions",
"/dynamo_components/{dynamo_component_name}/versions",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
},
tags=[API_TAG_MODELS],
)
async def create_dynamo_nim_version(
request: CreateDynamoNimVersionRequest,
dynamo_nim: DynamoNim = GetDynamoNim,
async def create_dynamo_component_version(
request: CreateDynamoComponentVersionRequest,
dynamo_component: DynamoComponent = GetDynamoComponent,
session: AsyncSession = Depends(get_session),
):
"""
......@@ -406,49 +441,49 @@ async def create_dynamo_nim_version(
print("[DEBUG]request", request)
try:
# Create without validation
db_dynamo_nim_version = DynamoNimVersion(
db_dynamo_component_version = DynamoComponentVersion(
**request.model_dump(),
dynamo_nim_id=dynamo_nim.id,
upload_status=DynamoNimUploadStatus.Pending,
dynamo_component_id=dynamo_component.id,
upload_status=DynamoComponentUploadStatus.Pending,
image_build_status=ImageBuildStatus.Pending,
)
DynamoNimVersion.model_validate(db_dynamo_nim_version)
tag = f"{dynamo_nim.name}:{db_dynamo_nim_version.version}"
DynamoComponentVersion.model_validate(db_dynamo_component_version)
tag = f"{dynamo_component.name}:{db_dynamo_component_version.version}"
except ValidationError as e:
raise HTTPException(status_code=422, detail=json.loads(e.json())) # type: ignore
except BaseException as e:
raise HTTPException(status_code=422, detail=json.loads(e.json())) # type: ignore
try:
session.add(db_dynamo_nim_version)
session.add(db_dynamo_component_version)
await session.flush()
await session.refresh(db_dynamo_nim_version)
await session.refresh(db_dynamo_component_version)
except IntegrityError as e:
logger.error(f"Details: {str(e)}")
await session.rollback()
logger.error(f"The Dynamo NIM {tag} already exists")
logger.error(f"The Dynamo Component {tag} already exists")
raise HTTPException(
status_code=422,
detail=f"The Dynamo NIM version {tag} already exists",
detail=f"The Dynamo Component version {tag} already exists",
) # type: ignore
except SQLAlchemyError as e:
logger.error("Something went wrong with adding the Dynamo NIM")
logger.error("Something went wrong with adding the Dynamo Component")
raise HTTPException(status_code=500, detail=str(e))
logger.debug(
f"Commiting {dynamo_nim.name}:{db_dynamo_nim_version.version} to database"
f"Commiting {dynamo_component.name}:{db_dynamo_component_version.version} to database"
)
await session.commit()
schema = await convert_dynamo_nim_version_model_to_schema(
session, [db_dynamo_nim_version]
schema = await convert_dynamo_component_version_model_to_schema(
session, [db_dynamo_component_version]
)
return schema[0]
@router.get(
"/bento_repositories/{dynamo_nim_name}/bentos",
"/bento_repositories/{dynamo_component_name}/bentos",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
......@@ -456,56 +491,60 @@ async def create_dynamo_nim_version(
tags=[API_TAG_MODELS],
)
@router.get(
"/dynamo_nims/{dynamo_nim_name}/versions",
"/dynamo_components/{dynamo_component_name}/versions",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
},
tags=[API_TAG_MODELS],
)
async def get_dynamo_nim_versions(
async def get_dynamo_component_versions(
*,
dynamo_nim: DynamoNim = GetDynamoNim,
dynamo_component: DynamoComponent = GetDynamoComponent,
session: AsyncSession = Depends(get_session),
query_params: ListQuerySchema = Depends(),
):
dynamo_nim_schemas = await convert_dynamo_nim_model_to_schema(session, [dynamo_nim])
dynamo_nim_schema = dynamo_nim_schemas[0]
dynamo_component_schemas = await convert_dynamo_component_model_to_schema(
session, [dynamo_component]
)
dynamo_component_schema = dynamo_component_schemas[0]
total_statement = (
select(DynamoNimVersion)
select(DynamoComponentVersion)
.where(
DynamoNimVersion.dynamo_nim_id == dynamo_nim.id,
DynamoComponentVersion.dynamo_component_id == dynamo_component.id,
)
.order_by(desc(DynamoNimVersion.created_at))
.order_by(desc(DynamoComponentVersion.created_at))
)
result = await session.exec(total_statement)
dynamo_nim_versions = result.all()
total = len(dynamo_nim_versions)
dynamo_component_versions = result.all()
total = len(dynamo_component_versions)
statement = total_statement.limit(query_params.count)
result = await session.exec(statement)
dynamo_nim_versions = list(result.all())
dynamo_component_versions = list(result.all())
dynamo_nim_version_schemas = await convert_dynamo_nim_version_model_to_schema(
session, dynamo_nim_versions, dynamo_nim
dynamo_component_version_schemas = (
await convert_dynamo_component_version_model_to_schema(
session, dynamo_component_versions, dynamo_component
)
)
items = [
DynamoNimVersionWithNimSchema(
**version.model_dump(), repository=dynamo_nim_schema
DynamoComponentVersionWithNimSchema(
**version.model_dump(), repository=dynamo_component_schema
)
for version in dynamo_nim_version_schemas
for version in dynamo_component_version_schemas
]
return DynamoNimVersionsWithNimListSchema(
return DynamoComponentVersionsWithNimListSchema(
total=total, count=query_params.count, start=query_params.start, items=items
)
@router.patch(
"/bento_repositories/{dynamo_nim_name}/bentos/{version}",
"/bento_repositories/{dynamo_component_name}/bentos/{version}",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
......@@ -513,41 +552,43 @@ async def get_dynamo_nim_versions(
tags=[API_TAG_MODELS],
)
@router.patch(
"/dynamo_nims/{dynamo_nim_name}/versions/{version}",
"/dynamo_components/{dynamo_component_name}/versions/{version}",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
},
tags=[API_TAG_MODELS],
)
async def update_dynamo_nim_version(
async def update_dynamo_component_version(
*,
dynamo_nim_entities: tuple[DynamoNimVersion, DynamoNim] = GetDynamoNimVersion,
request: UpdateDynamoNimVersionRequest,
dynamo_component_entities: tuple[
DynamoComponentVersion, DynamoComponent
] = GetDynamoComponentVersion,
request: UpdateDynamoComponentVersionRequest,
session: AsyncSession = Depends(get_session),
):
dynamo_nim_version, _ = dynamo_nim_entities
dynamo_nim_version.manifest = request.manifest.model_dump()
dynamo_component_version, _ = dynamo_component_entities
dynamo_component_version.manifest = request.manifest.model_dump()
try:
session.add(dynamo_nim_version)
session.add(dynamo_component_version)
await session.flush()
await session.refresh(dynamo_nim_version)
await session.refresh(dynamo_component_version)
except SQLAlchemyError as e:
logger.error("Something went wrong with adding the Dynamo NIM")
logger.error("Something went wrong with adding the Dynamo Component")
raise HTTPException(status_code=500, detail=str(e))
logger.debug("Updating Dynamo NIM")
logger.debug("Updating Dynamo Component")
await session.commit()
schema = await convert_dynamo_nim_version_model_to_schema(
session, [dynamo_nim_version]
schema = await convert_dynamo_component_version_model_to_schema(
session, [dynamo_component_version]
)
return schema[0]
@router.put(
"/bento_repositories/{dynamo_nim_name}/bentos/{version}/upload",
"/bento_repositories/{dynamo_component_name}/bentos/{version}/upload",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
......@@ -555,31 +596,33 @@ async def update_dynamo_nim_version(
tags=[API_TAG_MODELS],
)
@router.put(
"/dynamo_nims/{dynamo_nim_name}/versions/{version}/upload",
"/dynamo_components/{dynamo_component_name}/versions/{version}/upload",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
},
tags=[API_TAG_MODELS],
)
async def upload_dynamo_nim_version(
async def upload_dynamo_component_version(
*,
dynamo_nim_entities: tuple[DynamoNimVersion, DynamoNim] = GetDynamoNimVersion,
dynamo_component_entities: tuple[
DynamoComponentVersion, DynamoComponent
] = GetDynamoComponentVersion,
file: Annotated[bytes, Body()],
session: AsyncSession = Depends(get_session),
s3_storage: S3Storage = Depends(get_s3_storage),
):
dynamo_nim_version, dynamo_nim = dynamo_nim_entities
object_name = f"{dynamo_nim.name}/{dynamo_nim_version.version}"
dynamo_component_version, dynamo_component = dynamo_component_entities
object_name = f"{dynamo_component.name}/{dynamo_component_version.version}"
try:
s3_storage.upload_file(file, object_name)
dynamo_nim_version.upload_status = DynamoNimUploadStatus.Success
dynamo_nim_version.upload_finished_at = (
dynamo_component_version.upload_status = DynamoComponentUploadStatus.Success
dynamo_component_version.upload_finished_at = (
utc_now_naive()
) # datetime.now(timezone.utc)
session.add(dynamo_nim_version)
session.add(dynamo_component_version)
await session.commit()
return {"message": "File uploaded successfully"}
......@@ -593,7 +636,7 @@ def generate_file_path(version) -> str:
@router.get(
"/bento_repositories/{dynamo_nim_name}/bentos/{version}/download",
"/bento_repositories/{dynamo_component_name}/bentos/{version}/download",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
......@@ -601,20 +644,22 @@ def generate_file_path(version) -> str:
tags=[API_TAG_MODELS],
)
@router.get(
"/dynamo_nims/{dynamo_nim_name}/versions/{version}/download",
"/dynamo_components/{dynamo_component_name}/versions/{version}/download",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
},
tags=[API_TAG_MODELS],
)
async def download_dynamo_nim_version(
async def download_dynamo_component_version(
*,
dynamo_nim_entities: tuple[DynamoNimVersion, DynamoNim] = GetDynamoNimVersion,
dynamo_component_entities: tuple[
DynamoComponentVersion, DynamoComponent
] = GetDynamoComponentVersion,
s3_storage: S3Storage = Depends(get_s3_storage),
):
dynamo_nim_version, dynamo_nim = dynamo_nim_entities
object_name = f"{dynamo_nim.name}/{dynamo_nim_version.version}"
dynamo_component_version, dynamo_component = dynamo_component_entities
object_name = f"{dynamo_component.name}/{dynamo_component_version.version}"
try:
file_data = s3_storage.download_file(object_name)
......@@ -627,7 +672,7 @@ async def download_dynamo_nim_version(
@router.patch(
"/bento_repositories/{dynamo_nim_name}/bentos/{version}/start_upload",
"/bento_repositories/{dynamo_component_name}/bentos/{version}/start_upload",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
......@@ -635,34 +680,36 @@ async def download_dynamo_nim_version(
tags=[API_TAG_MODELS],
)
@router.patch(
"/dynamo_nims/{dynamo_nim_name}/versions/{version}/start_upload",
"/dynamo_components/{dynamo_component_name}/versions/{version}/start_upload",
responses={
200: {"description": "Successful Response"},
422: {"description": "Validation Error"},
},
tags=[API_TAG_MODELS],
)
async def start_dynamo_nim_version_upload(
async def start_dynamo_component_version_upload(
*,
dynamo_nim_entities: tuple[DynamoNimVersion, DynamoNim] = GetDynamoNimVersion,
dynamo_component_entities: tuple[
DynamoComponentVersion, DynamoComponent
] = GetDynamoComponentVersion,
session: AsyncSession = Depends(get_session),
):
dynamo_nim_version, _ = dynamo_nim_entities
dynamo_nim_version.upload_status = DynamoNimUploadStatus.Uploading
dynamo_component_version, _ = dynamo_component_entities
dynamo_component_version.upload_status = DynamoComponentUploadStatus.Uploading
try:
session.add(dynamo_nim_version)
session.add(dynamo_component_version)
await session.flush()
await session.refresh(dynamo_nim_version)
await session.refresh(dynamo_component_version)
except SQLAlchemyError as e:
logger.error("Something went wrong with adding the Dynamo NIM")
logger.error("Something went wrong with adding the Dynamo Component")
raise HTTPException(status_code=500, detail=str(e))
logger.debug("Setting Dynamo NIM upload status to Uploading.")
logger.debug("Setting Dynamo Component upload status to Uploading.")
await session.commit()
schema = await convert_dynamo_nim_version_model_to_schema(
session, [dynamo_nim_version]
schema = await convert_dynamo_component_version_model_to_schema(
session, [dynamo_component_version]
)
return schema[0]
......@@ -677,23 +724,23 @@ async def health_check():
"""
async def convert_dynamo_nim_model_to_schema(
session: AsyncSession, entities: List[DynamoNim]
) -> List[DynamoNimSchema]:
dynamo_nim_schemas = []
async def convert_dynamo_component_model_to_schema(
session: AsyncSession, entities: List[DynamoComponent]
) -> List[DynamoComponentSchema]:
dynamo_component_schemas = []
for entity in entities:
try:
statement = (
select(DynamoNimVersion)
select(DynamoComponentVersion)
.where(
DynamoNimVersion.dynamo_nim_id == entity.id,
DynamoComponentVersion.dynamo_component_id == entity.id,
)
.order_by(desc(DynamoNimVersion.created_at))
.order_by(desc(DynamoComponentVersion.created_at))
.limit(DEFAULT_LIMIT)
)
total_statement = select(func.count(col(DynamoNimVersion.id))).where(
DynamoNimVersion.dynamo_nim_id == entity.id
total_statement = select(func.count(col(DynamoComponentVersion.id))).where(
DynamoComponentVersion.dynamo_component_id == entity.id
)
result = await session.exec(total_statement)
total = result.first()
......@@ -701,10 +748,10 @@ async def convert_dynamo_nim_model_to_schema(
total = 0
result = await session.exec(statement)
dynamo_nim_versions = list(result.all())
dynamo_nim_version_schemas = (
await convert_dynamo_nim_version_model_to_schema(
session, dynamo_nim_versions, entity
dynamo_component_versions = list(result.all())
dynamo_component_version_schemas = (
await convert_dynamo_component_version_model_to_schema(
session, dynamo_component_versions, entity
)
)
......@@ -713,47 +760,49 @@ async def convert_dynamo_nim_model_to_schema(
updated_at = make_aware(entity.updated_at)
deleted_at = make_aware(entity.deleted_at) if entity.deleted_at else None
dynamo_nim_schemas.append(
DynamoNimSchema(
dynamo_component_schemas.append(
DynamoComponentSchema(
uid=entity.id,
created_at=created_at,
updated_at=updated_at,
deleted_at=deleted_at,
name=entity.name,
resource_type=ResourceType.DynamoNim,
resource_type=ResourceType.DynamoComponent,
labels=[],
latest_bento=(
None
if not dynamo_nim_version_schemas
else dynamo_nim_version_schemas[0]
if not dynamo_component_version_schemas
else dynamo_component_version_schemas[0]
),
latest_bentos=dynamo_nim_version_schemas,
latest_bentos=dynamo_component_version_schemas,
n_bentos=total,
description=entity.description,
)
)
except SQLAlchemyError as e:
logger.error(
"Something went wrong with getting associated Dynamo NIM versions"
"Something went wrong with getting associated Dynamo Component versions"
)
raise HTTPException(status_code=500, detail=str(e))
return dynamo_nim_schemas
return dynamo_component_schemas
async def convert_dynamo_nim_version_model_to_schema(
async def convert_dynamo_component_version_model_to_schema(
session: AsyncSession,
entities: List[DynamoNimVersion],
dynamo_nim: Optional[DynamoNim] = None,
) -> List[DynamoNimVersionSchema]:
dynamo_nim_version_schemas = []
entities: List[DynamoComponentVersion],
dynamo_component: Optional[DynamoComponent] = None,
) -> List[DynamoComponentVersionSchema]:
dynamo_component_version_schemas = []
for entity in entities:
if not dynamo_nim:
statement = select(DynamoNim).where(DynamoNim.id == entity.dynamo_nim_id)
if not dynamo_component:
statement = select(DynamoComponent).where(
DynamoComponent.id == entity.dynamo_component_id
)
results = await session.exec(statement)
dynamo_nim = results.first()
dynamo_component = results.first()
if dynamo_nim:
if dynamo_component:
# Add timezone info for API responses
created_at = make_aware(utc_now_naive()) # make_aware(entity.created_at)
updated_at = make_aware(utc_now_naive()) # make_aware(entity.updated_at)
......@@ -770,30 +819,30 @@ async def convert_dynamo_nim_version_model_to_schema(
build_at = make_aware(utc_now_naive()) # make_aware(entity.build_at)
# description = entity.description or ""
dynamo_nim_version_schema = DynamoNimVersionSchema(
dynamo_component_version_schema = DynamoComponentVersionSchema(
description="",
version=entity.version,
image_build_status=entity.image_build_status,
upload_status=str(entity.upload_status.value),
upload_finished_reason=entity.upload_finished_reason,
uid=entity.id,
name=dynamo_nim.name,
name=dynamo_component.name,
created_at=created_at,
resource_type=ResourceType.DynamoNimVersion,
resource_type=ResourceType.DynamoComponentVersion,
labels=[],
manifest=entity.manifest,
updated_at=updated_at,
bento_repository_uid=dynamo_nim.id,
bento_repository_uid=dynamo_component.id,
# upload_started_at=upload_started_at,
# upload_finished_at=upload_finished_at,
transmission_strategy=TransmissionStrategy.Proxy,
build_at=build_at,
)
dynamo_nim_version_schemas.append(dynamo_nim_version_schema)
dynamo_component_version_schemas.append(dynamo_component_version_schema)
else:
raise HTTPException(
status_code=500, detail="Failed to find related Dynamo NIM"
status_code=500, detail="Failed to find related Dynamo Component"
) # Should never happen
return dynamo_nim_version_schemas
return dynamo_component_version_schemas
......@@ -73,7 +73,7 @@ def create_custom_resource(
def create_dynamo_deployment(
name: str,
namespace: str,
dynamo_nim: str,
dynamo_component: str,
labels: Dict[str, str],
envs: Optional[List[Dict[str, str]]] = None,
) -> Dict[str, Any]:
......@@ -83,7 +83,7 @@ def create_dynamo_deployment(
Args:
name: Deployment name
namespace: Target namespace
dynamo_nim: Bento name and version (format: name:version)
dynamo_component: Bento name and version (format: name:version)
labels: Resource labels
envs: Optional list of environment variables
......@@ -95,7 +95,7 @@ def create_dynamo_deployment(
"kind": "DynamoGraphDeployment",
"metadata": {"name": name, "namespace": namespace, "labels": labels},
"spec": {
"dynamoGraph": dynamo_nim,
"dynamoGraph": dynamo_component,
"services": {},
"envs": envs if envs else [],
},
......
......@@ -22,7 +22,7 @@ from sqlalchemy import Column, DateTime
from sqlmodel import Field as SQLField
from sqlmodel import UniqueConstraint
from .components import DynamoNimBase, DynamoNimVersionBase
from .components import DynamoComponentBase, DynamoComponentVersionBase
"""
This file stores all of the models/tables stored in the SQL database.
......@@ -72,12 +72,14 @@ def make_aware(dt: Optional[datetime]) -> Optional[datetime]:
return dt
class DynamoNimVersion(DynamoNimVersionBase, table=True):
"""A row in the dynamo nim table."""
class DynamoComponentVersion(DynamoComponentVersionBase, table=True):
"""A row in the dynamo component table."""
__tablename__ = "dynamonimversion"
__tablename__ = "dynamocomponentversion"
__table_args__ = (
UniqueConstraint("dynamo_nim_id", "version", name="version_unique_per_nim"),
UniqueConstraint(
"dynamo_component_id", "version", name="version_unique_per_component"
),
)
id: str = SQLField(default_factory=new_compound_entity_id, primary_key=True)
......@@ -95,13 +97,13 @@ class DynamoNimVersion(DynamoNimVersionBase, table=True):
# upload_finished_at: datetime = SQLField(sa_column=Column(DateTime, nullable=True))
build_at: datetime = SQLField(sa_column=Column(DateTime, nullable=False))
dynamo_nim_id: str = SQLField(foreign_key="dynamonim.id")
dynamo_component_id: str = SQLField(foreign_key="dynamocomponent.id")
class DynamoNim(DynamoNimBase, table=True):
"""A row in the dynamo nim table."""
class DynamoComponent(DynamoComponentBase, table=True):
"""A row in the dynamo component table."""
__tablename__ = "dynamonim"
__tablename__ = "dynamocomponent"
id: str = SQLField(default_factory=new_compound_entity_id, primary_key=True)
......
......@@ -56,7 +56,8 @@ class DeploymentConfigSchema(BaseModel):
class UpdateDeploymentSchema(DeploymentConfigSchema):
bento: str
bento: Optional[str] = None
component: Optional[str] = None
class CreateDeploymentSchema(UpdateDeploymentSchema):
......
......@@ -38,14 +38,14 @@ func NewApiStoreClient(endpoint string) *ApiStoreClient {
}
func (c *ApiStoreClient) GetDynamoComponent(ctx context.Context, name, version string) (component *schemas.DynamoComponent, err error) {
url_ := urlJoin(c.endpoint, fmt.Sprintf("/api/v1/dynamo_nims/%s/versions/%s", name, version))
url_ := urlJoin(c.endpoint, fmt.Sprintf("/api/v1/dynamo_components/%s/versions/%s", name, version))
component = &schemas.DynamoComponent{}
_, err = DoJsonRequest(ctx, "GET", url_, nil, nil, nil, component, nil)
return
}
func (c *ApiStoreClient) PresignDynamoComponentDownloadURL(ctx context.Context, name, version string) (component *schemas.DynamoComponent, err error) {
url_ := urlJoin(c.endpoint, fmt.Sprintf("/api/v1/dynamo_nims/%s/versions/%s/presign_download_url", name, version))
url_ := urlJoin(c.endpoint, fmt.Sprintf("/api/v1/dynamo_components/%s/versions/%s/presign_download_url", name, version))
component = &schemas.DynamoComponent{}
_, err = DoJsonRequest(ctx, "PATCH", url_, nil, nil, nil, component, nil)
return
......
......@@ -1000,7 +1000,7 @@ func (r *DynamoComponentReconciler) generateImageBuilderPodTemplateSpec(ctx cont
r.Recorder.Eventf(opt.DynamoComponent, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got presigned url for dynamoComponent %s from api store service", opt.DynamoComponent.Spec.DynamoComponent)
dynamoComponentDownloadURL = dynamoComponent_.PresignedDownloadUrl
} else {
dynamoComponentDownloadURL = fmt.Sprintf("%s/api/v1/dynamo_nims/%s/versions/%s/download", apiStoreConf.Endpoint, dynamoComponentRepositoryName, dynamoComponentVersion)
dynamoComponentDownloadURL = fmt.Sprintf("%s/api/v1/dynamo_components/%s/versions/%s/download", apiStoreConf.Endpoint, dynamoComponentRepositoryName, dynamoComponentVersion)
}
}
......
......@@ -154,7 +154,7 @@ func RetrieveDynamoGraphDownloadURL(ctx context.Context, dynamoDeployment *v1alp
recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got presigned url for dynamo graph %s from api store service", dynamoDeployment.Spec.DynamoGraph)
dynamoGraphDownloadURL = dynamoComponent_.PresignedDownloadUrl
} else {
dynamoGraphDownloadURL = fmt.Sprintf("%s/api/v1/dynamo_nims/%s/versions/%s/download", apiStoreConf.Endpoint, dynamoComponentRepositoryName, dynamoComponentVersion)
dynamoGraphDownloadURL = fmt.Sprintf("%s/api/v1/dynamo_components/%s/versions/%s/download", apiStoreConf.Endpoint, dynamoComponentRepositoryName, dynamoComponentVersion)
}
return &dynamoGraphDownloadURL, nil
......
......@@ -18,33 +18,21 @@
from __future__ import annotations
import json
import logging
import re
import sys
import typing as t
from http import HTTPStatus
from typing import Any, Dict, List, Optional, TextIO
import typer
from bentoml._internal.cloud.base import Spinner
from bentoml._internal.cloud.client import RestApiClient
from bentoml._internal.cloud.config import CloudClientConfig, CloudClientContext
from bentoml._internal.cloud.deployment import Deployment, DeploymentConfigParameters
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml.exceptions import BentoMLException, CLIException, CloudRESTApiClientError
from rich.console import Console
from simple_di import Provide, inject
from dynamo.runtime.logging import configure_dynamo_logging
from .utils import resolve_service_config
# Configure logging to suppress INFO HTTP logs
logging.getLogger("httpx").setLevel(logging.WARNING) # HTTP client library logs
logging.getLogger("httpcore").setLevel(logging.WARNING) # HTTP core library logs
configure_dynamo_logging()
logger = logging.getLogger(__name__)
from rich.panel import Panel
from dynamo.sdk.cli.utils import resolve_service_config
from dynamo.sdk.core.deploy.bento_cloud import BentoCloudDeploymentManager
from dynamo.sdk.core.deploy.kubernetes import KubernetesDeploymentManager
from dynamo.sdk.core.protocol.deployment import (
Deployment,
DeploymentManager,
DeploymentResponse,
)
from dynamo.sdk.core.runner import TargetEnum
app = typer.Typer(
help="Deploy Dynamo applications to Dynamo Cloud Kubernetes Platform",
......@@ -54,59 +42,43 @@ app = typer.Typer(
console = Console(highlight=False)
if t.TYPE_CHECKING:
from bentoml._internal.cloud import BentoCloudClient
def raise_deployment_config_error(err: BentoMLException, action: str) -> t.NoReturn:
if err.error_code == HTTPStatus.UNAUTHORIZED:
raise BentoMLException(
f"{err}\n* Dynamo Cloud API token is required for authorization. Please provide a valid endpoint with --endpoint option."
) from None
raise BentoMLException(
f"Failed to {action} deployment due to invalid configuration: {err}"
) from None
def _get_urls(deployment: Deployment) -> List[str]:
"""Get URLs from deployment."""
latest = deployment._client.v2.get_deployment(deployment.name, deployment.cluster)
urls = latest.urls if hasattr(latest, "urls") else None
return urls if urls is not None else []
def get_deployment_manager(target: str, endpoint: str) -> DeploymentManager:
"""Return the appropriate DeploymentManager for the given target and endpoint."""
if target == "kubernetes":
return KubernetesDeploymentManager(endpoint)
elif target == "bento_cloud":
return BentoCloudDeploymentManager(endpoint)
else:
raise ValueError(f"Unknown deployment target: {target}")
def _display_deployment_info(spinner: Spinner, deployment: Deployment) -> None:
"""Helper function to display deployment status and URLs consistently."""
# Get status directly from schema and escape any Rich markup
status = deployment._schema.status if deployment._schema.status else "unknown"
# Escape any characters that are interpreted as markup
reformatted_status = status.replace("[", "\\[")
spinner.log(f"[bold]Status:[/] {reformatted_status}")
# Get URLs directly from schema
spinner.log("[bold]Ingress URLs:[/]")
try:
# Get latest deployment info for URLs
urls = _get_urls(deployment)
if urls:
for url in urls:
spinner.log(f" - {url}")
else:
spinner.log(" No URLs available")
except Exception:
# If refresh fails, fall back to existing URLs
if deployment._urls:
for url in deployment._urls:
spinner.log(f" - {url}")
else:
spinner.log(" No URLs available")
def display_deployment_info(
deployment_manager: DeploymentManager, deployment: DeploymentResponse
) -> None:
"""Display deployment summary, status, and endpoint URLs using rich panels."""
name = deployment.get("name") or deployment.get("uid") or deployment.get("id")
status = deployment_manager.get_status(name)
urls = deployment_manager.get_endpoint_urls(name)
created_at = deployment.get("created_at", "")
summary = (
f"[white]Name:[/] [cyan]{name}[/]\n"
f"[white]Status:[/] [{status.color}]{status.value}[/]"
)
if created_at:
summary += f"\n[white]Created:[/] [magenta]{created_at}[/]"
if urls:
summary += f"\n[white]URLs:[/] [blue]{' | '.join(urls)}[/]"
else:
summary += "\n[white]URLs:[/] [blue]None[/]"
console.print(Panel(summary, title="Deployment", style="cyan"))
def _build_env_dicts(
config_file: Optional[TextIO] = None,
args: Optional[list[str]] = None,
envs: Optional[list[str]] = None,
) -> list[dict]:
config_file: t.Optional[t.TextIO] = None,
args: t.Optional[t.List[str]] = None,
envs: t.Optional[t.List[str]] = None,
) -> t.List[dict]:
"""
Build a list of environment variable dicts from config file, args, and env strings.
......@@ -114,7 +86,6 @@ def _build_env_dicts(
config_file: Optional configuration file
args: Optional list of extra arguments
envs: Optional list of environment variable strings (KEY=VALUE)
Returns:
List of dicts suitable for use as envs
"""
......@@ -122,246 +93,135 @@ def _build_env_dicts(
env_dicts = []
if service_configs:
config_json = json.dumps(service_configs)
logger.info(f"Deployment service configuration: {config_json}")
env_dicts.append({"name": "DYN_DEPLOYMENT_CONFIG", "value": config_json})
if envs:
for env in envs:
if "=" not in env:
raise CLIException(f"Invalid env format: {env}. Use KEY=VALUE.")
raise RuntimeError(f"Invalid env format: {env}. Use KEY=VALUE.")
key, value = env.split("=", 1)
env_dicts.append({"name": key, "value": value})
return env_dicts
@inject
def create_deployment(
pipeline: Optional[str] = None,
name: Optional[str] = None,
config_file: Optional[TextIO] = None,
wait: bool = True,
timeout: int = 3600,
dev: bool = False,
args: Optional[List[str]] = None,
envs: Optional[List[str]] = None,
_cloud_client: BentoCloudClient = Provide[BentoMLContainer.bentocloud_client],
) -> Deployment:
# Build env_dicts from config_file, args, and envs
env_dicts = _build_env_dicts(config_file=config_file, args=args, envs=envs)
config_params = DeploymentConfigParameters(
name=name,
bento=pipeline,
envs=env_dicts,
secrets=None,
cli=True,
dev=dev,
)
try:
config_params.verify()
except BentoMLException as e:
print(f"Error: {str(e)}")
sys.exit(1)
with Spinner(console=console) as spinner:
try:
# Create deployment with initial status message
spinner.update("Creating deployment on Dynamo Cloud...")
deployment = _cloud_client.deployment.create(
deployment_config_params=config_params
)
deployment.admin_console = _get_urls(deployment) # remove dashboard url
spinner.log(
f':white_check_mark: Created deployment "{deployment.name}" in cluster "{deployment.cluster}"'
)
if wait:
# Update spinner text for waiting phase
spinner.log(
"[bold blue]Waiting for deployment to be ready, you can use --no-wait to skip this process[/]"
)
retcode = deployment.wait_until_ready(timeout=timeout, spinner=spinner)
if retcode != 0:
sys.exit(retcode)
def _handle_deploy_create(
ctx: typer.Context,
pipeline: str = typer.Argument(..., help="Dynamo pipeline to deploy"),
name: t.Optional[str] = typer.Option(None, "--name", "-n", help="Deployment name"),
config_file: t.Optional[typer.FileText] = typer.Option(
None, "--config-file", "-f", help="Configuration file path"
),
wait: bool = typer.Option(
True, "--wait/--no-wait", help="Do not wait for deployment to be ready"
),
timeout: int = typer.Option(
3600, "--timeout", help="Timeout for deployment to be ready in seconds"
),
endpoint: str = typer.Option(
..., "--endpoint", "-e", help="Dynamo Cloud endpoint", envvar="DYNAMO_CLOUD"
),
envs: t.Optional[t.List[str]] = typer.Option(
None,
"--env",
help="Environment variable(s) to set (format: KEY=VALUE). Note: These environment variables will be set on ALL services in your Dynamo pipeline.",
),
target: str = typer.Option(..., "--target", "-t", help="Deployment target"),
dev: bool = typer.Option(False, "--dev", help="Development mode for deployment"),
) -> DeploymentResponse:
"""Handle deployment creation. This is a helper function for the create and deploy commands.
_display_deployment_info(spinner, deployment)
return deployment
Args:
ctx: typer context
pipeline: pipeline to deploy
name: name of the deployment
"""
except BentoMLException as e:
error_msg = str(e)
if "already exists" in error_msg:
# Extract deployment name from error message and clean it
match = re.search(r'"([^"]+?)(?:\\+)?" already exists', error_msg)
dep_name = match.group(1).rstrip("\\") if match else name
spinner.log(
"[red]:x: Error:[/] "
f'Deployment "{dep_name}" already exists. To create a new deployment:\n'
" 1. Use a different name with the --name flag\n"
f" 2. Or delete the existing deployment with: dynamo deployment delete {dep_name}"
)
sys.exit(1)
spinner.log(f"[red]:x: Error:[/] {str(e)}")
sys.exit(1)
@inject
def update_deployment(
name: str,
config_file: Optional[TextIO] = None,
args: Optional[List[str]] = None,
envs: Optional[List[str]] = None,
_cloud_client: BentoCloudClient = Provide[BentoMLContainer.bentocloud_client],
) -> Deployment:
"""Update an existing deployment on Dynamo Cloud.
from dynamo.sdk.cli.utils import configure_target_environment
from dynamo.sdk.lib.loader import load_entry_service
Args:
name: The name of the deployment to update
config_file: Optional configuration file for the update
args: Optional extra arguments for config
envs: Optional list of environment variables (KEY=VALUE)
# TODO: hardcoding this is a hack to get the services for the deployment
# we should find a better way to do this once build is finished/generic
configure_target_environment(TargetEnum.BENTO)
entry_service = load_entry_service(pipeline)
Returns:
Deployment: The updated deployment object
"""
# Build env_dicts from config_file, args, and envs
env_dicts = _build_env_dicts(config_file=config_file, args=args, envs=envs)
config_params = DeploymentConfigParameters(
name=name,
deployment_manager = get_deployment_manager(target, endpoint)
env_dicts = _build_env_dicts(config_file=config_file, args=ctx.args, envs=envs)
deployment = Deployment(
name=name or (pipeline if pipeline else "unnamed-deployment"),
namespace="default",
pipeline=pipeline,
entry_service=entry_service,
envs=env_dicts,
cli=True,
)
try:
config_params.verify(create=False)
except BentoMLException as e:
print(f"Error: {str(e)}")
sys.exit(1)
with Spinner(console=console) as spinner:
try:
spinner.update(f'Updating deployment "{name}" on Dynamo Cloud...')
deployment = _cloud_client.deployment.update(
deployment_config_params=config_params
)
spinner.log(
f':white_check_mark: Updated deployment "{deployment.name}" in cluster "{deployment.cluster}"'
)
spinner.log(
"[yellow]Update submitted. It may take a short time for the new pods to become active. Please wait a bit before accessing the deployment to ensure your changes are live.[/yellow]"
)
_display_deployment_info(spinner, deployment)
return deployment
except BentoMLException as e:
spinner.log(f"[red]:x: Error:[/] Failed to update deployment: {str(e)}")
sys.exit(1)
@inject
def get_deployment(
name: str,
cluster: Optional[str] = None,
_cloud_client: BentoCloudClient = Provide[BentoMLContainer.bentocloud_client],
) -> Deployment:
"""Get deployment details from Dynamo Cloud."""
with Spinner(console=console) as spinner:
try:
spinner.update(f'Getting deployment "{name}" from Dynamo Cloud...')
deployment = _cloud_client.deployment.get(name=name, cluster=cluster)
spinner.log(
f':white_check_mark: Found deployment "{deployment.name}" in cluster "{deployment.cluster}"'
console.print("[bold green]Creating deployment...")
deployment = deployment_manager.create_deployment(
deployment,
dev=dev,
)
console.print(f"[bold green]Deployment '{name}' created.")
if wait:
deployment, ready = deployment_manager.wait_until_ready(
name, timeout=timeout
)
_display_deployment_info(spinner, deployment)
return deployment
except BentoMLException as e:
error_msg = str(e)
if "No cloud context default found" in error_msg:
spinner.log(
"[red]:x: Error:[/] Not logged in to Dynamo Cloud. Please provide a valid endpoint with --endpoint option."
if ready:
console.print(
Panel(
f"Deployment [bold]{name}[/] is [green]ready[/]",
title="Status",
)
)
sys.exit(1)
if "404 Not Found" in error_msg or "Deployment not found" in error_msg:
cluster_msg = f" in cluster {cluster}" if cluster else ""
spinner.log(f"[red]:x: Deployment '{name}' not found{cluster_msg}")
sys.exit(1)
spinner.log(f"[red]:x: Error:[/] Failed to get deployment: {error_msg}")
sys.exit(1)
@inject
def delete_deployment(
name: str,
cluster: Optional[str] = None,
_cloud_client: BentoCloudClient = Provide[BentoMLContainer.bentocloud_client],
) -> None:
"""Delete a deployment from Dynamo Cloud."""
with Spinner(console=console) as spinner:
try:
spinner.update(f'Deleting deployment "{name}" from Dynamo Cloud...')
_cloud_client.deployment.delete(name=name, cluster=cluster)
spinner.log(f':white_check_mark: Successfully deleted deployment "{name}"')
except BentoMLException as e:
error_msg = str(e)
if "No cloud context default found" in error_msg:
spinner.log(
"[red]:x: Error:[/] Not logged in to Dynamo Cloud. Please provide a valid endpoint with --endpoint option."
else:
console.print(
Panel(
f"Deployment [bold]{name}[/] did not become ready in time.",
title="Status",
style="red",
)
)
sys.exit(1)
if "404 Not Found" in error_msg or "Deployment not found" in error_msg:
cluster_msg = f" in cluster {cluster}" if cluster else ""
spinner.log(f"[red]:x: Deployment '{name}' not found{cluster_msg}")
sys.exit(1)
spinner.log(f"[red]:x: Error:[/] {error_msg}")
sys.exit(1)
@inject
def list_deployments(
cluster: Optional[str] = None,
search: Optional[str] = None,
dev: bool = False,
q: Optional[str] = None,
labels: Optional[List[Dict[str, Any]]] = None,
_cloud_client: BentoCloudClient = Provide[BentoMLContainer.bentocloud_client],
) -> None:
"""List all deployments from Dynamo Cloud."""
with Spinner(console=console) as spinner:
try:
# Handle label-based filtering
if labels is not None:
label_query = " ".join(f"label:{d['key']}={d['value']}" for d in labels)
if q is not None:
q = f"{q} {label_query}"
else:
q = label_query
spinner.update("Getting deployments from Dynamo Cloud...")
# Get all deployments in a single call by setting count=1000
deployments = _cloud_client.deployment.list(
cluster=cluster, search=search, dev=dev, q=q
)
if not deployments:
spinner.log("No deployments found")
return
spinner.log(":white_check_mark: Found deployments:")
for deployment in deployments:
spinner.log(f"\n{deployment.name} (cluster: {deployment.cluster})")
_display_deployment_info(spinner, deployment)
except BentoMLException as e:
if "No cloud context default found" in str(e):
spinner.log(
"[red]:x: Error:[/] Not logged in to Dynamo Cloud. Please provide a valid endpoint with --endpoint option."
display_deployment_info(deployment_manager, deployment)
return deployment
except Exception as e:
if isinstance(e, RuntimeError) and isinstance(e.args[0], tuple):
status, msg, url = e.args[0]
if status == 409:
console.print(
Panel(
f"Cannot create deployment because deployment with name '{name}' already exists.",
title="Error",
style="red",
)
)
sys.exit(1)
spinner.log(f"[red]:x: Error:[/] Failed to list deployments: {str(e)}")
sys.exit(1)
elif status in (400, 422):
console.print(
Panel(f"Validation error:\n{msg}", title="Error", style="red")
)
elif status == 404:
console.print(
Panel(f"Not found: {url} \n{msg}", title="Error", style="red")
)
elif status == 500:
console.print(
Panel(f"Internal server error:\n{msg}", title="Error", style="red")
)
else:
console.print(
Panel(
f"Failed to create deployment:\n{msg}",
title="Error",
style="red",
)
)
else:
console.print(Panel(str(e), title="Error", style="red"))
raise typer.Exit(1)
@app.command()
def create(
ctx: typer.Context,
pipeline: Optional[str] = typer.Argument(..., help="Dynamo pipeline to deploy"),
name: Optional[str] = typer.Option(..., "--name", "-n", help="Deployment name"),
config_file: Optional[typer.FileText] = typer.Option(
pipeline: str = typer.Argument(..., help="Dynamo pipeline to deploy"),
name: t.Optional[str] = typer.Option(None, "--name", "-n", help="Deployment name"),
config_file: t.Optional[typer.FileText] = typer.Option(
None, "--config-file", "-f", help="Configuration file path"
),
wait: bool = typer.Option(
......@@ -373,75 +233,108 @@ def create(
endpoint: str = typer.Option(
..., "--endpoint", "-e", help="Dynamo Cloud endpoint", envvar="DYNAMO_CLOUD"
),
envs: Optional[List[str]] = typer.Option(
envs: t.Optional[t.List[str]] = typer.Option(
None,
"--env",
help="Environment variable(s) to set (format: KEY=VALUE). Note: These environment variables will be set on ALL services in your Dynamo pipeline.",
),
) -> None:
"""Create a deployment on Dynamo Cloud.
Create a deployment using parameters, or using config yaml file.
"""
login_to_cloud(endpoint)
create_deployment(
pipeline=pipeline,
name=name,
config_file=config_file,
wait=wait,
timeout=timeout,
args=ctx.args if hasattr(ctx, "args") else None,
envs=envs,
target: str = typer.Option(..., "--target", "-t", help="Deployment target"),
dev: bool = typer.Option(False, "--dev", help="Development mode for deployment"),
) -> DeploymentResponse:
"""Create a deployment on Dynamo Cloud."""
return _handle_deploy_create(
ctx, pipeline, name, config_file, wait, timeout, endpoint, envs, target, dev
)
@app.command()
def get(
name: str = typer.Argument(..., help="Deployment name"),
cluster: Optional[str] = typer.Option(None, "--cluster", help="Cluster name"),
target: str = typer.Option(..., "--target", "-t", help="Deployment target"),
endpoint: str = typer.Option(
..., "--endpoint", "-e", help="Dynamo Cloud endpoint", envvar="DYNAMO_CLOUD"
),
) -> None:
"""Get deployment details from Dynamo Cloud.
Get deployment details by name.
"""
login_to_cloud(endpoint)
get_deployment(name, cluster=cluster)
) -> DeploymentResponse:
"""Get details for a specific deployment by name."""
deployment_manager = get_deployment_manager(target, endpoint)
try:
with console.status(f"[bold green]Getting deployment '{name}'..."):
deployment = deployment_manager.get_deployment(name)
display_deployment_info(deployment_manager, deployment)
return deployment
except Exception as e:
if isinstance(e, RuntimeError) and isinstance(e.args[0], tuple):
status, msg, url = e.args[0]
if status == 404:
console.print(
Panel(
f"Deployment '{name}' not found.\n{msg}",
title="Error",
style="red",
)
)
else:
console.print(
Panel(
f"Failed to get deployment:\n{msg}", title="Error", style="red"
)
)
else:
console.print(Panel(str(e), title="Error", style="red"))
raise typer.Exit(1)
@app.command("list")
def list_deployments_command(
cluster: Optional[str] = typer.Option(None, "--cluster", help="Cluster name"),
search: Optional[str] = typer.Option(None, "--search", help="Search query"),
dev: bool = typer.Option(False, "--dev", help="List development deployments"),
query: Optional[str] = typer.Option(
None, "--query", "-q", help="Advanced query string"
),
def list_deployments(
target: str = typer.Option(..., "--target", "-t", help="Deployment target"),
endpoint: str = typer.Option(
...,
"--endpoint",
"-e",
help="Dynamo Cloud endpoint",
envvar="DYNAMO_CLOUD",
..., "--endpoint", "-e", help="Dynamo Cloud endpoint", envvar="DYNAMO_CLOUD"
),
) -> None:
"""List all deployments from Dynamo Cloud.
List and filter deployments.
"""
login_to_cloud(endpoint)
list_deployments(cluster=cluster, search=search, dev=dev, q=query)
"""List all deployments."""
deployment_manager = get_deployment_manager(target, endpoint)
try:
with console.status("[bold green]Listing deployments..."):
deployments = deployment_manager.list_deployments()
if not deployments:
console.print(
Panel("No deployments found.", title="Deployments", style="yellow")
)
else:
console.print(Panel("[bold]Deployments List[/]", style="blue"))
for dep in deployments:
display_deployment_info(deployment_manager, dep)
except Exception as e:
if isinstance(e, RuntimeError) and isinstance(e.args[0], tuple):
status, msg, url = e.args[0]
if status == 404:
console.print(
Panel(
f"Endpoint not found: {url}\n{msg}", title="Error", style="red"
)
)
else:
console.print(
Panel(
f"Failed to list deployments:\n{msg}",
title="Error",
style="red",
)
)
else:
console.print(Panel(str(e), title="Error", style="red"))
raise typer.Exit(1)
@app.command()
def update(
ctx: typer.Context,
name: str = typer.Argument(..., help="Deployment name to update"),
config_file: Optional[typer.FileText] = typer.Option(
target: str = typer.Option(..., "--target", "-t", help="Deployment target"),
config_file: t.Optional[typer.FileText] = typer.Option(
None, "--config-file", "-f", help="Configuration file path"
),
envs: Optional[List[str]] = typer.Option(
envs: t.Optional[t.List[str]] = typer.Option(
None,
"--env",
help="Environment variable(s) to set (format: KEY=VALUE). Note: These environment variables will be set on ALL services in your Dynamo pipeline.",
......@@ -454,39 +347,95 @@ def update(
Update a deployment using parameters or a config yaml file.
"""
login_to_cloud(endpoint)
update_deployment(
name=name,
config_file=config_file,
envs=envs,
)
deployment_manager = get_deployment_manager(target, endpoint)
try:
with console.status(f"[bold green]Updating deployment '{name}'..."):
env_dicts = _build_env_dicts(
config_file=config_file, args=ctx.args, envs=envs
)
deployment = Deployment(
name=name,
namespace="default",
envs=env_dicts,
)
deployment_manager.update_deployment(
deployment_id=name, deployment=deployment
)
console.print(
Panel(
"[yellow]Update submitted. It may take a short time for the new pods to become active. Please wait a bit before accessing the deployment to ensure your changes are live.[/yellow]",
title="Status",
)
)
except Exception as e:
if isinstance(e, RuntimeError) and isinstance(e.args[0], tuple):
status, msg, url = e.args[0]
if status == 404:
console.print(
Panel(
f"Deployment '{name}' not found.\n{msg}",
title="Error",
style="red",
)
)
else:
console.print(
Panel(
f"Failed to update deployment:\n{msg}",
title="Error",
style="red",
)
)
else:
console.print(Panel(str(e), title="Error", style="red"))
raise typer.Exit(1)
@app.command()
def delete(
name: str = typer.Argument(..., help="Deployment name"),
cluster: Optional[str] = typer.Option(None, "--cluster", help="Cluster name"),
target: str = typer.Option(..., "--target", "-t", help="Deployment target"),
endpoint: str = typer.Option(
...,
"--endpoint",
"-e",
help="Dynamo Cloud endpoint",
envvar="DYNAMO_CLOUD",
..., "--endpoint", "-e", help="Dynamo Cloud endpoint", envvar="DYNAMO_CLOUD"
),
) -> None:
"""Delete a deployment from Dynamo Cloud.
Delete deployment by name.
"""
login_to_cloud(endpoint)
delete_deployment(name, cluster=cluster)
"""Delete a deployment by name."""
deployment_manager = get_deployment_manager(target, endpoint)
try:
with console.status(f"[bold green]Deleting deployment '{name}'..."):
deployment_manager.delete_deployment(name)
console.print(
Panel(f"Deleted deployment {name}", title="Success", style="green")
)
except Exception as e:
if isinstance(e, RuntimeError) and isinstance(e.args[0], tuple):
status, msg, url = e.args[0]
if status == 404:
console.print(
Panel(
f"Deployment '{name}' not found.",
title="Error",
style="red",
)
)
else:
console.print(
Panel(
f"Failed to delete deployment:\n{msg}",
title="Error",
style="red",
)
)
else:
console.print(Panel(str(e), title="Error", style="red"))
raise typer.Exit(1)
def deploy(
ctx: typer.Context,
pipeline: Optional[str] = typer.Argument(..., help="Dynamo pipeline to deploy"),
name: Optional[str] = typer.Option(..., "--name", "-n", help="Deployment name"),
config_file: Optional[typer.FileText] = typer.Option(
pipeline: str = typer.Argument(..., help="Dynamo pipeline to deploy"),
name: t.Optional[str] = typer.Option(None, "--name", "-n", help="Deployment name"),
config_file: t.Optional[typer.FileText] = typer.Option(
None, "--config-file", "-f", help="Configuration file path"
),
wait: bool = typer.Option(
......@@ -496,69 +445,17 @@ def deploy(
3600, "--timeout", help="Timeout for deployment to be ready in seconds"
),
endpoint: str = typer.Option(
...,
"--endpoint",
"-e",
help="Dynamo Cloud endpoint",
envvar="DYNAMO_CLOUD",
..., "--endpoint", "-e", help="Dynamo Cloud endpoint", envvar="DYNAMO_CLOUD"
),
) -> None:
"""Create a deployment on Dynamo Cloud.
Create a deployment using parameters, or using config yaml file.
"""
login_to_cloud(endpoint)
create_deployment(
pipeline=pipeline,
name=name,
config_file=config_file,
wait=wait,
timeout=timeout,
args=ctx.args if hasattr(ctx, "args") else None,
envs: t.Optional[t.List[str]] = typer.Option(
None,
"--env",
help="Environment variable(s) to set (format: KEY=VALUE). Note: These environment variables will be set on ALL services in your Dynamo pipeline.",
),
target: str = typer.Option(..., "--target", "-t", help="Deployment target"),
dev: bool = typer.Option(False, "--dev", help="Development mode for deployment"),
) -> DeploymentResponse:
"""Deploy a Dynamo pipeline (same as deployment create)."""
return _handle_deploy_create(
ctx, pipeline, name, config_file, wait, timeout, endpoint, envs, target, dev
)
def login_to_cloud(endpoint: str) -> None:
"""Connect to Dynamo Cloud silently using logging for success and console for errors."""
try:
logger.info(f"Running against Dynamo Cloud at {endpoint}")
api_token = "" # Using empty string for now as it's not used
cloud_rest_client = RestApiClient(endpoint, api_token)
user = cloud_rest_client.v1.get_current_user()
if user is None:
raise CLIException("current user is not found")
org = cloud_rest_client.v1.get_current_organization()
if org is None:
raise CLIException("current organization is not found")
current_context_name = CloudClientConfig.get_config().current_context_name
cloud_context = BentoMLContainer.cloud_context.get()
ctx = CloudClientContext(
name=cloud_context if cloud_context is not None else current_context_name,
endpoint=endpoint,
api_token=api_token,
email=user.email,
)
ctx.save()
logger.debug(
f"Configured Dynamo Cloud credentials (current-context: {ctx.name})"
)
logger.debug(f"Logged in as {user.email} at {org.name} organization")
except CloudRESTApiClientError as e:
if e.error_code == 401:
console.print(
f":police_car_light: Error validating token: HTTP 401: Bad credentials ({endpoint}/api-token)"
)
else:
console.print(
f":police_car_light: Error validating token: HTTP {e.error_code}"
)
raise BentoMLException(f"Failed to login to Dynamo Cloud: {str(e)}") from e
except Exception as e:
console.print(f":police_car_light: Error connecting to Dynamo Cloud: {str(e)}")
raise BentoMLException(f"Failed to login to Dynamo Cloud: {str(e)}") from e
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import typing as t
from bentoml._internal.cloud import BentoCloudClient
from bentoml._internal.cloud.client import RestApiClient
from bentoml._internal.cloud.config import CloudClientConfig, CloudClientContext
from bentoml._internal.cloud.deployment import DeploymentConfigParameters
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml.exceptions import BentoMLException, CLIException, CloudRESTApiClientError
from rich.console import Console
from dynamo.runtime.logging import configure_dynamo_logging
from dynamo.sdk.core.protocol.deployment import Deployment as ProtocolDeployment
from dynamo.sdk.core.protocol.deployment import (
DeploymentManager,
DeploymentResponse,
DeploymentStatus,
)
# Configure logging to suppress INFO HTTP logs
logging.getLogger("httpx").setLevel(logging.WARNING) # HTTP client library logs
logging.getLogger("httpcore").setLevel(logging.WARNING) # HTTP core library logs
configure_dynamo_logging()
logger = logging.getLogger(__name__)
console = Console(highlight=False)
class BentoCloudDeploymentManager(DeploymentManager):
"""
Implementation of DeploymentManager that talks to the BentoCloud deployment API.
Handles all BentoCloud-specific config parameter building, error handling, and API calls.
Accepts **kwargs for backend-specific options.
Raises exceptions for errors; CLI handles user interaction.
"""
def __init__(self, endpoint: str):
self.endpoint = endpoint.rstrip("/")
self._cloud_client = self._login_to_cloud()
def _login_to_cloud(self) -> "BentoCloudClient":
"""Connect to Dynamo Cloud and return an authenticated BentoCloudClient."""
try:
logger.info(f"Running against Dynamo Cloud at {self.endpoint}")
api_token = "" # Using empty string for now as it's not used
cloud_rest_client = RestApiClient(self.endpoint, api_token)
user = cloud_rest_client.v1.get_current_user()
if user is None:
raise CLIException("current user is not found")
org = cloud_rest_client.v1.get_current_organization()
if org is None:
raise CLIException("current organization is not found")
current_context_name = CloudClientConfig.get_config().current_context_name
cloud_context = BentoMLContainer.cloud_context.get()
ctx = CloudClientContext(
name=cloud_context
if cloud_context is not None
else current_context_name,
endpoint=self.endpoint,
api_token=api_token,
email=user.email,
)
ctx.save()
logger.debug(
f"Configured Dynamo Cloud credentials (current-context: {ctx.name})"
)
logger.debug(f"Logged in as {user.email} at {org.name} organization")
return BentoCloudClient(endpoint=self.endpoint, api_key=api_token)
except CloudRESTApiClientError as e:
if e.error_code == 401:
console.print(
f":police_car_light: Error validating token: HTTP 401: Bad credentials ({self.endpoint}/api-token)"
)
else:
console.print(
f":police_car_light: Error validating token: HTTP {e.error_code}"
)
raise BentoMLException(f"Failed to login to Dynamo Cloud: {str(e)}") from e
except Exception as e:
console.print(
f":police_car_light: Error connecting to Dynamo Cloud: {str(e)}"
)
raise BentoMLException(f"Failed to login to Dynamo Cloud: {str(e)}") from e
def create_deployment(
self, deployment: ProtocolDeployment, **kwargs
) -> DeploymentResponse:
dev = kwargs.get("dev", False)
config_params = DeploymentConfigParameters(
name=deployment.name,
bento=deployment.pipeline or deployment.namespace,
envs=deployment.envs,
secrets=None,
cli=True,
dev=dev,
)
try:
config_params.verify()
except BentoMLException as e:
raise RuntimeError((400, f"Config verification error: {str(e)}", None))
try:
deployment_obj = self._cloud_client.deployment.create(
deployment_config_params=config_params
)
return deployment_obj.to_dict()
except BentoMLException as e:
error_msg = str(e)
if "already exists" in error_msg:
raise RuntimeError((409, error_msg, None)) from e
raise RuntimeError((500, error_msg, None)) from e
def update_deployment(
self, deployment_id: str, deployment: ProtocolDeployment
) -> DeploymentResponse:
config_params = DeploymentConfigParameters(
name=deployment_id,
envs=deployment.envs,
cli=True,
)
try:
config_params.verify(create=False)
except BentoMLException as e:
raise RuntimeError((400, f"Config verification error: {str(e)}", None))
try:
deployment = self._cloud_client.deployment.update(
deployment_config_params=config_params
)
return deployment.to_dict()
except BentoMLException as e:
raise RuntimeError((500, f"Deployment update error: {str(e)}", None)) from e
def get_deployment(self, deployment_id: str) -> DeploymentResponse:
try:
deployment_obj = self._cloud_client.deployment.get(name=deployment_id)
return deployment_obj.to_dict()
except BentoMLException as e:
error_msg = str(e)
raise RuntimeError((404, error_msg, None)) from e
def list_deployments(self) -> list[DeploymentResponse]:
try:
deployments = self._cloud_client.deployment.list()
return [
d.to_dict() if hasattr(d, "to_dict") else vars(d) for d in deployments
]
except BentoMLException as e:
error_msg = str(e)
raise RuntimeError((500, error_msg, None)) from e
def delete_deployment(self, deployment_id: str) -> None:
try:
self._cloud_client.deployment.delete(name=deployment_id)
except BentoMLException as e:
error_msg = str(e)
raise RuntimeError((404, error_msg, None)) from e
def get_status(
self,
deployment_id: str,
) -> DeploymentStatus:
dep = self._cloud_client.deployment.get(deployment_id)
status = dep._schema.status if dep._schema.status else "unknown"
# Escape any characters that are interpreted as markup
status = status.replace("[", "\\[")
if status == "running":
return DeploymentStatus.RUNNING
elif status == "failed":
return DeploymentStatus.FAILED
elif status == "deploying":
return DeploymentStatus.IN_PROGRESS
elif status == "terminated":
return DeploymentStatus.TERMINATED
else:
return DeploymentStatus.PENDING
def wait_until_ready(
self, deployment_id: str, timeout: int = 3600
) -> t.Tuple[DeploymentResponse, bool]:
dep = self._cloud_client.deployment.get(name=deployment_id)
retcode = dep.wait_until_ready(timeout=timeout)
if retcode != 0:
return dep.to_dict(), False
return dep.to_dict(), True
def get_endpoint_urls(
self,
deployment_id: str,
) -> list[str]:
dep = self.get_deployment(deployment_id)
latest = self._cloud_client.deployment._client.v2.get_deployment(
dep["name"], dep["cluster"]
)
urls = latest.urls if hasattr(latest, "urls") else None
return urls if urls is not None else []
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import os
import tarfile
import time
import typing as t
from datetime import datetime
import requests
from dynamo.sdk.core.protocol.deployment import (
Deployment,
DeploymentManager,
DeploymentResponse,
DeploymentStatus,
Service,
)
class KubernetesDeploymentManager(DeploymentManager):
"""
Implementation of DeploymentManager that talks to the dynamo_store deployment API.
Accepts **kwargs for backend-specific options.
Handles error reporting and payload construction according to the API schema.
Raises exceptions for errors; CLI handles user interaction.
"""
def __init__(self, endpoint: str):
self.endpoint = endpoint.rstrip("/")
self.session = requests.Session()
self.namespace = "default"
def _upload_pipeline(self, pipeline: str, entry_service: Service, **kwargs) -> None:
"""Upload the entire pipeline as a single component/version, with a manifest of all services."""
session = self.session
endpoint = self.endpoint
pipeline_name, pipeline_version = pipeline.split(":")
# Check if component exists before POST
comp_url = f"{endpoint}/api/v1/dynamo_components"
comp_get_url = f"{endpoint}/api/v1/dynamo_components/{pipeline_name}"
comp_exists = False
comp_resp = session.get(comp_get_url)
if comp_resp.status_code == 200:
comp_exists = True
if not comp_exists:
comp_payload = {
"name": pipeline_name,
"description": "Registered by Dynamo's KubernetesDeploymentManager",
}
resp = session.post(comp_url, json=comp_payload)
if resp.status_code not in (200, 201, 409):
print(resp.status_code)
raise RuntimeError(f"Failed to create component: {resp.text}")
# Check if version exists before POST
ver_url = f"{endpoint}/api/v1/dynamo_components/{pipeline_name}/versions"
ver_get_url = f"{endpoint}/api/v1/dynamo_components/{pipeline_name}/versions/{pipeline_version}"
ver_exists = False
ver_resp = session.get(ver_get_url)
if ver_resp.status_code == 200:
ver_exists = True
if not ver_exists:
build_at = kwargs.get("build_at")
if not build_at:
build_at = datetime.utcnow()
if isinstance(build_at, str):
try:
build_at = datetime.fromisoformat(build_at)
except Exception:
build_at = datetime.utcnow()
manifest = {
"service": entry_service.service_name,
"apis": entry_service.apis,
"size_bytes": entry_service.size_bytes,
}
ver_payload = {
"name": entry_service.name,
"description": f"Auto-registered version for {pipeline}",
"resource_type": "dynamo_component_version",
"version": entry_service.version,
"manifest": manifest,
"build_at": build_at.isoformat(),
}
resp = session.post(ver_url, json=ver_payload)
if resp.status_code not in (200, 201, 409):
raise RuntimeError(f"Failed to create component version: {resp.text}")
# Upload the graph
build_dir = entry_service.path
if not build_dir or not os.path.isdir(build_dir):
raise FileNotFoundError(f"Built pipeline directory not found: {build_dir}")
tar_stream = io.BytesIO()
with tarfile.open(fileobj=tar_stream, mode="w") as tar:
tar.add(build_dir, arcname=".")
tar_stream.seek(0)
upload_url = f"{endpoint}/api/v1/dynamo_components/{pipeline_name}/versions/{pipeline_version}/upload"
upload_headers = {"Content-Type": "application/x-tar"}
resp = session.put(upload_url, data=tar_stream, headers=upload_headers)
if resp.status_code not in (200, 201, 204):
raise RuntimeError(f"Failed to upload pipeline artifact: {resp.text}")
def create_deployment(self, deployment: Deployment, **kwargs) -> DeploymentResponse:
"""Create a new deployment. Ensures all components and versions are registered/uploaded before creating the deployment."""
# For each service/component in the deployment, upload it to the API store
self._upload_pipeline(
pipeline=deployment.pipeline or deployment.namespace,
entry_service=deployment.entry_service,
**kwargs,
)
# Now create the deployment
dev = kwargs.get("dev", False)
payload = {
"name": deployment.name,
"component": deployment.pipeline or deployment.namespace,
"dev": dev,
"envs": deployment.envs,
}
payload = {k: v for k, v in payload.items() if v is not None}
url = f"{self.endpoint}/api/v2/deployments"
try:
resp = self.session.post(url, json=payload)
resp.raise_for_status()
return resp.json()
except requests.HTTPError as e:
status = e.response.status_code if e.response is not None else None
msg = e.response.text if e.response is not None else str(e)
if "already exists" in msg:
raise RuntimeError((409, msg, None)) from e
raise RuntimeError((status, msg, url)) from e
def update_deployment(
self, deployment_id: str, deployment: Deployment, **kwargs
) -> None:
"""Update an existing deployment."""
access_authorization = kwargs.get("access_authorization", False)
payload = {
"name": deployment.name,
"component": deployment.pipeline or deployment.namespace,
"envs": deployment.envs,
"services": deployment.services,
"access_authorization": access_authorization,
}
payload = {k: v for k, v in payload.items() if v is not None}
url = f"{self.endpoint}/api/v2/deployments/{deployment_id}"
try:
resp = self.session.put(url, json=payload)
resp.raise_for_status()
except requests.HTTPError as e:
status = e.response.status_code if e.response is not None else None
msg = e.response.text if e.response is not None else str(e)
raise RuntimeError((status, msg, url))
def get_deployment(self, deployment_id: str) -> DeploymentResponse:
"""Get deployment details."""
url = f"{self.endpoint}/api/v2/deployments/{deployment_id}"
try:
resp = self.session.get(url)
resp.raise_for_status()
return resp.json()
except requests.HTTPError as e:
status = e.response.status_code if e.response is not None else None
msg = e.response.text if e.response is not None else str(e)
raise RuntimeError((status, msg, url)) from e
def list_deployments(self) -> t.List[DeploymentResponse]:
"""List all deployments."""
url = f"{self.endpoint}/api/v2/deployments"
try:
resp = self.session.get(url)
resp.raise_for_status()
data = resp.json()
return data.get("items", [])
except requests.HTTPError as e:
msg = e.response.text if e.response is not None else str(e)
raise RuntimeError(
(e.response.status_code if e.response else None, msg, url)
)
def delete_deployment(self, deployment_id: str) -> None:
"""Delete a deployment."""
url = f"{self.endpoint}/api/v2/deployments/{deployment_id}"
try:
resp = self.session.delete(url)
resp.raise_for_status()
except requests.HTTPError as e:
status = e.response.status_code if e.response is not None else None
msg = e.response.text if e.response is not None else str(e)
raise RuntimeError((status, msg, url)) from e
def get_status(
self,
deployment_id: str,
) -> DeploymentStatus:
dep = self.get_deployment(deployment_id)
status = dep.get("status", "unknown")
if status == "running":
return DeploymentStatus.RUNNING
elif status == "failed":
return DeploymentStatus.FAILED
elif status == "deploying":
return DeploymentStatus.IN_PROGRESS
elif status == "terminated":
return DeploymentStatus.TERMINATED
else:
return DeploymentStatus.PENDING
def wait_until_ready(
self, deployment_id: str, timeout: int = 3600
) -> t.Tuple[DeploymentResponse, bool]:
start = time.time()
while time.time() - start < timeout:
dep = self.get_deployment(deployment_id)
status = self.get_status(deployment_id)
if status == DeploymentStatus.RUNNING:
return dep, True
elif status == DeploymentStatus.FAILED:
return dep, False
time.sleep(5)
return dep, False
def get_endpoint_urls(
self,
deployment_id: str,
) -> t.List[str]:
dep = self.get_deployment(deployment_id)
return dep.get("urls", [])
......@@ -77,32 +77,52 @@ class DeploymentStatus(str, Enum):
"""Status of a dynamo deployment."""
PENDING = "pending"
IN_PROGRESS = "in_progress"
IN_PROGRESS = "in progress"
RUNNING = "running"
FAILED = "failed"
TERMINATED = "terminate"
SCALED_TO_ZERO = "scaled_to_zero"
SCALED_TO_ZERO = "scaled to zero"
@property
def color(self) -> str:
return {
DeploymentStatus.RUNNING: "green",
DeploymentStatus.IN_PROGRESS: "yellow",
DeploymentStatus.PENDING: "yellow",
DeploymentStatus.FAILED: "red",
DeploymentStatus.TERMINATED: "red",
DeploymentStatus.SCALED_TO_ZERO: "yellow",
}.get(self, "white")
@dataclass
class ScalingPolicy:
policy: str
parameters: dict[str, t.Union[int, float, str]] = field(default_factory=dict)
parameters: t.Dict[str, t.Union[int, float, str]] = field(default_factory=dict)
@dataclass
class Env:
name: str
value: str = ""
@dataclass
class Service:
"""A single component."""
"""The entry service of a deployment."""
service_name: str
name: str
namespace: str
class_name: str
id: str | None = None
cmd: list[str] = field(default_factory=list)
version: str
path: str
cmd: t.List[str] = field(default_factory=list)
resources: Resources | None = None
environment: dict[str, str] = field(default_factory=dict)
secrets: list[str] = field(default_factory=list)
envs: t.List[Env] = field(default_factory=list)
secrets: t.List[str] = field(default_factory=list)
scaling: ScalingPolicy = field(default_factory=lambda: ScalingPolicy(policy="none"))
apis: dict = field(default_factory=dict)
size_bytes: int = 0
@dataclass
......@@ -111,21 +131,28 @@ class Deployment:
name: str
namespace: str
services: list[Service] = field(default_factory=list)
pipeline: t.Optional[str] = None
entry_service: t.Optional[Service] = None
envs: t.Optional[t.List[dict]] = None
# Type alias for deployment responses (e.g., from backend APIs)
DeploymentResponse = t.Dict[str, t.Any]
class DeploymentManager(ABC):
"""Interface for managing dynamo graph deployments."""
@abstractmethod
def create_deployment(self, deployment: Deployment) -> str:
def create_deployment(self, deployment: Deployment, **kwargs) -> DeploymentResponse:
"""Create new deployment.
Args:
deployment: Deployment configuration
**kwargs: Additional backend-specific arguments
Returns:
The ID of the created deployment
The created deployment
"""
pass
......@@ -140,7 +167,7 @@ class DeploymentManager(ABC):
pass
@abstractmethod
def get_deployment(self, deployment_id: str) -> dict[str, t.Any]:
def get_deployment(self, deployment_id: str) -> DeploymentResponse:
"""Get deployment details.
Args:
......@@ -152,7 +179,7 @@ class DeploymentManager(ABC):
pass
@abstractmethod
def list_deployments(self) -> list[dict[str, t.Any]]:
def list_deployments(self) -> t.List[DeploymentResponse]:
"""List all deployments.
Returns:
......@@ -170,10 +197,13 @@ class DeploymentManager(ABC):
pass
@abstractmethod
def get_status(self, deployment_id: str) -> DeploymentStatus:
def get_status(
self,
deployment_id: str,
) -> DeploymentStatus:
"""Get the current status of a deployment.
Args:
Args (one of):
deployment_id: The ID of the deployment
Returns:
......@@ -182,7 +212,9 @@ class DeploymentManager(ABC):
pass
@abstractmethod
def wait_until_ready(self, deployment_id: str, timeout: int = 3600) -> bool:
def wait_until_ready(
self, deployment_id: str, timeout: int = 3600
) -> t.Tuple[DeploymentResponse, bool]:
"""Wait until a deployment is ready.
Args:
......@@ -190,15 +222,18 @@ class DeploymentManager(ABC):
timeout: Maximum time to wait in seconds
Returns:
True if deployment became ready, False if timed out
Tuple of deployment response and a boolean indicating if the deployment became ready
"""
pass
@abstractmethod
def get_endpoint_urls(self, deployment_id: str) -> list[str]:
def get_endpoint_urls(
self,
deployment_id: str,
) -> t.List[str]:
"""Get the list of endpoint urls attached to a deployment.
Args:
Args (one of):
deployment_id: The ID of the deployment
Returns:
......
......@@ -22,6 +22,8 @@ from typing import Any, Dict, Generic, List, Optional, Set, Tuple, Type, TypeVar
from fastapi import FastAPI
from dynamo.sdk.core.protocol.deployment import Env
T = TypeVar("T", bound=object)
......@@ -74,6 +76,17 @@ class ServiceInterface(Generic[T], ABC):
"""Get the service configuration"""
pass
@property
def dependencies(self) -> Dict[str, "DependencyInterface"]:
"""Get the service dependencies"""
return {}
@property
@abstractmethod
def envs(self) -> List[Env]:
"""Get the service's environment variables"""
return []
@property
@abstractmethod
def inner(self) -> Type[T]:
......@@ -110,20 +123,12 @@ class ServiceInterface(Generic[T], ABC):
"""Inject configuration from environment into service configs"""
pass
@property
# @abstractmethod
def dependencies(self) -> Dict[str, "DependencyInterface"]:
"""Get the service dependencies"""
return {}
# @property
@abstractmethod
def get_service_configs(self) -> Dict[str, ServiceConfig]:
"""Get all services"""
return {}
@property
# @abstractmethod
def service_configs(self) -> List[ServiceConfig]:
"""Get all service configs"""
return []
......
......@@ -23,6 +23,7 @@ from _bentoml_sdk.service.dependency import Dependency as BentoDependency
from fastapi import FastAPI
from dynamo.sdk.core.decorators.endpoint import DynamoClient, DynamoEndpoint
from dynamo.sdk.core.protocol.deployment import Env
from dynamo.sdk.core.protocol.interface import (
DependencyInterface,
DeploymentTarget,
......@@ -154,6 +155,10 @@ class BentoServiceAdapter(ServiceMixin, ServiceInterface[T]):
def inner(self) -> Type[T]:
return self._bentoml_service.inner
@property
def envs(self) -> List[Env]:
return self._bentoml_service.envs
def get_endpoints(self) -> Dict[str, DynamoEndpointInterface]:
return self._endpoints
......
......@@ -29,6 +29,7 @@ from circus.watcher import Watcher
from fastapi import FastAPI
from dynamo.sdk.core.decorators.endpoint import DynamoClient, DynamoEndpoint
from dynamo.sdk.core.protocol.deployment import Env
from dynamo.sdk.core.protocol.interface import (
DependencyInterface,
DeploymentTarget,
......@@ -115,6 +116,10 @@ class LocalService(ServiceMixin, ServiceInterface[T]):
def config(self) -> ServiceConfig:
return self._config
@property
def envs(self) -> List[Env]:
return self._config.get("envs", [])
@property
def inner(self) -> Type[T]:
return self._inner_cls
......
......@@ -23,6 +23,9 @@ import os
import sys
from typing import Optional, TypeVar
import yaml
from dynamo.sdk.core.protocol.deployment import Service
from dynamo.sdk.lib.service import DynamoService
logger = logging.getLogger(__name__)
......@@ -191,3 +194,62 @@ def _do_import(import_str: str, working_dir: str) -> DynamoService:
object.__setattr__(instance, "_import_str", import_str_val)
return instance
def _get_dir_size(path: str) -> int:
total = 0
for dirpath, _, filenames in os.walk(path):
for f in filenames:
fp = os.path.join(dirpath, f)
if os.path.isfile(fp):
total += os.path.getsize(fp)
logger.info(f"Total size of {path}: {total} bytes")
return total
def load_entry_service(
pipeline_tag: str, build_dir: str = "~/bentoml/bentos"
) -> Service:
"""
Given a built pipeline tag (e.g. frontend:2uk2fwzvqsswvs7t), load the entry service as a deployment Service instance.
"""
if ":" not in pipeline_tag:
raise ValueError("pipeline_tag must be in the form name:version")
name, version = pipeline_tag.split(":", 1)
graph_dir = os.path.expanduser(f"{build_dir}/{name}/{version}")
if not os.path.isdir(graph_dir):
raise FileNotFoundError(f"Pipeline directory not found: {graph_dir}")
config_path = os.path.join(graph_dir, "bento.yaml")
if not os.path.isfile(config_path):
raise FileNotFoundError(
f"Pipeline config (bento.yaml) not found in {graph_dir}"
)
with open(config_path, encoding="utf-8") as f:
graph_cfg = yaml.safe_load(f)
# Add src_dir to sys.path if needed
src_dir = os.path.join(graph_dir, "src")
if src_dir not in sys.path:
sys.path.insert(0, src_dir)
# Compute size_bytes as the total size of the bento directory
size_bytes = _get_dir_size(graph_dir)
service_name = graph_cfg.get("service")
for svc in graph_cfg.get("services", []):
svc_name = svc["name"]
if svc_name != graph_cfg.get("entry_service"):
continue
entry_service = Service(
service_name=service_name,
name=svc_name,
namespace="default",
version=version,
path=graph_dir,
envs=graph_cfg.get("envs", []),
apis={},
size_bytes=size_bytes,
)
return entry_service
raise ValueError("No entry service found in the pipeline")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment