Commit ccfe101e authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

build: pip based installation of icp and runtime. Also make tritonserver optional


Signed-off-by: default avatarNeelay Shah <neelays@nvidia.com>
Co-authored-by: default avatarRyan McCormick <rmccormick@nvidia.com>
parent 5701753a
......@@ -154,8 +154,20 @@ COPY . /workspace
RUN cd runtime/rust && cargo build --release --locked && cargo doc --no-deps
RUN /workspace/icp/protos/gen_python.sh
# Install python packages
ARG PYTHON_PACKAGE_VERSION=0.0.1.dev+unknown
# SETUPTOOLS_SCM_PRETEND_VERSION allows dynamically setting the package versions during build/install.
# This allows having versioned packages during development between releases, such as commit IDs.
#
# Normally SCM version is taken directly from .git but this is not available in the Dockerfile
# and so we pass in via a buildarg
RUN SETUPTOOLS_SCM_PRETEND_VERSION_FOR_TRITON_DISTRIBUTED_ICP=${PYTHON_PACKAGE_VERSION} pip install -e /workspace/icp/python
RUN SETUPTOOLS_SCM_PRETEND_VERSION_FOR_TRITON_DISTRIBUTED_RUNTIME=${PYTHON_PACKAGE_VERSION} pip install -e /workspace/runtime/python
# Sets pythonpath for python modules
ENV PYTHONPATH="${PYTHONPATH}:/workspace/icp/python/src:/workspace/runtime/python/src:/workspace/examples/python:/opt/tritonserver/python/openai/openai_frontend"
ENV PYTHONPATH="${PYTHONPATH}:/workspace/examples/python:/opt/tritonserver/python/openai/openai_frontend"
# Enable system UCX
ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true
......
......@@ -22,12 +22,16 @@ PLATFORM=linux/amd64
# Get short commit hash
commit_id=$(git rev-parse --short HEAD)
# Attempt to get current tag
current_tag=$(git describe --tags --exact-match 2>/dev/null) || true
# if COMMIT_ID matches a TAG use that
current_tag=$(git describe --tags --exact-match 2>/dev/null | sed 's/^v//') || true
# Use tag if available, otherwise use commit hash
VERSION=${current_tag:-$commit_id}
# Get latest TAG and add COMMIT_ID for dev
latest_tag=$(git describe --tags --abbrev=0 $(git rev-list --tags --max-count=1 main) | sed 's/^v//') || true
# Use tag if available, otherwise use latest_tag.dev.commit_id
VERSION=v${current_tag:-$latest_tag.dev.$commit_id}
PYTHON_PACKAGE_VERSION=${current_tag:-$latest_tag.dev+$commit_id}
# Frameworks
#
......@@ -244,7 +248,7 @@ get_options "$@"
# BUILD DEV IMAGE
BUILD_ARGS+=" --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG --build-arg FRAMEWORK=$FRAMEWORK --build-arg ${FRAMEWORK}_FRAMEWORK=1"
BUILD_ARGS+=" --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG --build-arg FRAMEWORK=$FRAMEWORK --build-arg ${FRAMEWORK}_FRAMEWORK=1 --build-arg VERSION=$VERSION --build-arg PYTHON_PACKAGE_VERSION=$PYTHON_PACKAGE_VERSION"
if [ ! -z ${GITHUB_TOKEN} ]; then
BUILD_ARGS+=" --build-arg GITHUB_TOKEN=${GITHUB_TOKEN} "
......
......@@ -8,12 +8,12 @@ class EncodeDecodeOperator(Operator):
self,
name,
version,
triton_core,
request_plane,
data_plane,
parameters,
repository,
logger,
triton_core,
):
self._encoder = RemoteOperator("encoder", request_plane, data_plane)
self._decoder = RemoteOperator("decoder", request_plane, data_plane)
......
......@@ -41,12 +41,12 @@ class EncodeDecodeOperator(Operator):
self,
name,
version,
triton_core,
request_plane,
data_plane,
parameters,
repository,
logger,
triton_core,
):
self._encoder = RemoteOperator("encoder", request_plane, data_plane)
self._decoder = RemoteOperator("decoder", request_plane, data_plane)
......
......@@ -2,7 +2,7 @@ import argparse
import json
import logging
from dataclasses import field
from typing import AsyncGenerator, List, Optional
from typing import Any, AsyncGenerator, List, Optional
import numpy as np
......@@ -23,7 +23,6 @@ class VllmOperator(Operator):
self,
name: str,
version: int,
triton_core,
request_plane: RequestPlane,
data_plane: DataPlane,
parameters: Optional[dict[str, str | int | bool | bytes]] = field(
......@@ -31,6 +30,7 @@ class VllmOperator(Operator):
),
repository: Optional[str] = None,
logger: Optional[logging.Logger] = None,
triton_core: Optional[Any] = None,
):
self.name = name
self.version = version
......
[build-system]
requires = ["setuptools>=65.0", "setuptools-scm>=8"]
build-backend = "setuptools.build_meta"
[project]
name = "triton_distributed.icp"
dynamic = ["version"]
[tool.setuptools_scm]
version_file = "src/triton_distributed/icp/_version.py"
root = "../.."
[tool.setuptools.packages.find]
where = ["src"]
include = ["triton_distributed.icp*"]
namespaces= true
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Type
class CustomKeyErrorDict(dict):
def __init__(
self,
from_name: str,
to_name: str,
*args,
exception: Type[Exception] = ValueError,
**kwargs,
):
super().__init__(*args, **kwargs)
self._to_name = to_name
self._from_name = from_name
self._exception = exception
def __getitem__(self, key):
try:
return super().__getitem__(key)
except KeyError:
raise self._exception(
f"Unsupported {self._from_name}. Can't convert {key} to {self._to_name}"
) from None
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
# This file contains the DLPack API wrapped in Python style (see
# 'dlpack.h' for detail) and the utilities for Triton client to interact
# with DLPack
#
# Ref:
# https://github.com/dmlc/dlpack/blob/main/include/dlpack/dlpack.h
# https://github.com/dmlc/dlpack/blob/main/apps/numpy_dlpack/dlpack/from_numpy.py
################################################################################
import ctypes
from typing import Union
from triton_distributed.icp._custom_key_error_dict import CustomKeyErrorDict
from triton_distributed.icp.data_type import DataType
from triton_distributed.icp.memory_type import MemoryType, string_to_memory_type
try:
import cupy
except ImportError:
cupy = None
# Need to explicit set the res / arg types for pythonapi functions to
# work properly
ctypes.pythonapi.PyMem_RawMalloc.restype = ctypes.c_void_p
ctypes.pythonapi.PyMem_RawFree.argtypes = [ctypes.c_void_p]
ctypes.pythonapi.PyCapsule_New.restype = ctypes.py_object
ctypes.pythonapi.PyCapsule_New.argtypes = [
ctypes.c_void_p,
ctypes.c_char_p,
ctypes.c_void_p,
]
ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.c_void_p
ctypes.pythonapi.PyCapsule_GetPointer.argtypes = [ctypes.py_object, ctypes.c_char_p]
c_str_dltensor = b"dltensor"
class DLDeviceType(ctypes.c_int):
kDLCPU = 1
kDLCUDA = 2
kDLCUDAHost = 3
kDLOpenCL = 4
kDLVulkan = 7
kDLMetal = 8
kDLVPI = 9
kDLROCM = 10
kDLROCMHost = 11
kDLExtDev = 12
kDLCUDAManaged = 13
kDLOneAPI = 14
kDLWebGPU = 15
kDLHexagon = 16
DeviceOrMemoryType = Union[
tuple[MemoryType, int], MemoryType, tuple[DLDeviceType, int], str
]
class DLDevice(ctypes.Structure):
_fields_ = [
("device_type", ctypes.c_int),
("device_id", ctypes.c_int),
]
class DLDataTypeCode(ctypes.c_uint8):
kDLInt = 0
kDLUInt = 1
kDLFloat = 2
kDLOpaquePointer = 3
kDLBfloat = 4
kDLComplex = 5
kDLBool = 6
class DLDataType(ctypes.Structure):
_fields_ = [
("type_code", ctypes.c_uint8),
("bits", ctypes.c_uint8),
("lanes", ctypes.c_uint16),
]
class DLTensor(ctypes.Structure):
_fields_ = [
("data", ctypes.c_void_p),
("device", DLDevice),
("ndim", ctypes.c_int),
("dtype", DLDataType),
("shape", ctypes.POINTER(ctypes.c_int64)),
("strides", ctypes.POINTER(ctypes.c_int64)),
("byte_offset", ctypes.c_uint64),
]
class DLManagedTensor(ctypes.Structure):
_fields_ = [
("dl_tensor", DLTensor),
("manager_ctx", ctypes.c_void_p),
("deleter", ctypes.CFUNCTYPE(None, ctypes.c_void_p)),
]
# Utilities
def _raise_error(msg):
"""
Raise error with the provided message
"""
raise Exception(msg) from None
# Use as managed context in DLPack that doesn't hold ownership of the
# data content.
class DataViewContext:
def __init__(self, shape) -> None:
# Convert the Python object to ctypes objects expected by
# DLPack
self._shape = (ctypes.c_int64 * len(shape))(*shape)
# No strides: compact and row-major
self._strides = ctypes.POINTER(ctypes.c_int64)()
def as_manager_ctx(self) -> ctypes.c_void_p:
py_obj = ctypes.py_object(self)
py_obj_ptr = ctypes.pointer(py_obj)
ctypes.pythonapi.Py_IncRef(py_obj)
ctypes.pythonapi.Py_IncRef(ctypes.py_object(py_obj_ptr))
return ctypes.cast(py_obj_ptr, ctypes.c_void_p)
@ctypes.CFUNCTYPE(None, ctypes.c_void_p)
def managed_tensor_deleter(handle: ctypes.c_void_p) -> None:
dl_managed_tensor = DLManagedTensor.from_address(handle) # type: ignore
py_obj_ptr = ctypes.cast(
dl_managed_tensor.manager_ctx, ctypes.POINTER(ctypes.py_object)
)
py_obj = py_obj_ptr.contents
ctypes.pythonapi.Py_DecRef(py_obj)
ctypes.pythonapi.Py_DecRef(ctypes.py_object(py_obj_ptr))
ctypes.pythonapi.PyMem_RawFree(handle)
@ctypes.CFUNCTYPE(None, ctypes.c_void_p)
def pycapsule_deleter(handle: ctypes.c_void_p) -> None:
pycapsule: ctypes.py_object = ctypes.cast(handle, ctypes.py_object)
if ctypes.pythonapi.PyCapsule_IsValid(pycapsule, c_str_dltensor):
dl_managed_tensor = ctypes.pythonapi.PyCapsule_GetPointer(
pycapsule, c_str_dltensor
)
managed_tensor_deleter(dl_managed_tensor)
ctypes.pythonapi.PyCapsule_SetDestructor(pycapsule, None)
def is_contiguous_data(
ndim: ctypes.c_int,
shape: ctypes.POINTER(ctypes.c_int64), # type: ignore
stride: ctypes.POINTER(ctypes.c_int64), # type: ignore
):
# If 'stride' doesn't capture valid value
if (stride is None) or (not bool(stride)):
return True
calculated_stride = 1
# iterate stride in reverse order [ndim-1, -1)
for i in reversed(range(ndim)): # type: ignore
if stride[i] != calculated_stride:
return False
calculated_stride *= shape[i]
return True
def get_byte_size(
dtype: DLDataType, ndim: ctypes.c_int, shape: ctypes.POINTER(ctypes.c_int64) # type: ignore
):
element_byte_size = dtype.bits * dtype.lanes // 8 # Assume 8 bits in a byte
for i in range(ndim): # type: ignore
element_byte_size *= shape[i]
return element_byte_size
def get_dlpack_capsule(dlpack_obj, stream=None):
# Extract PyCapsule of the DLPack object
if hasattr(dlpack_obj, "__dlpack__"):
if not hasattr(dlpack_obj, "__dlpack_device__"):
_raise_error(
"DLPack expects '__dlpack_device__' if '__dlpack__' has been defined"
)
device = dlpack_obj.__dlpack_device__()
# Have to condition on the device type as, using numpy as example,
# some DLPack implementation doesn't accept 'stream' as arguments
if device != DLDeviceType.kDLCUDA:
return dlpack_obj.__dlpack__()
else:
return dlpack_obj.__dlpack__(stream)
else:
# Old interface where PyCapsule object is passed directly
return dlpack_obj
def get_dlpack_device(dlpack_obj):
if hasattr(dlpack_obj, "__dlpack_device__"):
return dlpack_obj.__dlpack_device__()
return None
def get_managed_tensor(dlcapsule):
ptr = ctypes.pythonapi.PyCapsule_GetPointer(dlcapsule, c_str_dltensor)
return DLManagedTensor.from_address(ptr)
class DLPackObject:
def __init__(self, value) -> None:
try:
stream = None
device, device_id = value.__dlpack_device__()
if device == DLDeviceType.kDLCUDA:
if cupy is None:
raise ValueError(
f"DLPack synchronization on device {device,device_id} not supported"
)
with cupy.cuda.Device(device_id):
stream = 1 # legacy default stream
self._capsule = get_dlpack_capsule(value, stream)
self._tensor = get_managed_tensor(self._capsule).dl_tensor
else:
self._capsule = get_dlpack_capsule(value)
self._tensor = get_managed_tensor(self._capsule).dl_tensor
except Exception as e:
raise ValueError(f"Object does not support DLPack protocol: {e}") from None
def __eq__(self, other) -> bool:
if not isinstance(other, DLPackObject):
return False
if self.byte_size != other.byte_size:
return False
if self.memory_type != other.memory_type:
return False
if self.memory_type_id != other.memory_type_id:
return False
if self.shape != other.shape:
return False
if self.data_ptr != other.data_ptr:
return False
if self.contiguous != other.contiguous:
return False
if self.data_type != other.data_type:
return False
return True
@property
def byte_size(self) -> int:
return get_byte_size(self._tensor.dtype, self._tensor.ndim, self._tensor.shape)
@property
def memory_type(self) -> MemoryType:
return DLPACK_DEVICE_TYPE_TO_MEMORY_TYPE[self._tensor.device.device_type]
@property
def memory_type_id(self) -> int:
return self._tensor.device.device_id
@property
def shape(self) -> list[int]:
return [self._tensor.shape[i] for i in range(self._tensor.ndim)]
@property
def data_type(self) -> DataType:
return DLPACK_TO_DATA_TYPE[self.dlpack_data_type]
@property
def dlpack_data_type(self) -> tuple[DLDataTypeCode, int]:
return (self._tensor.dtype.type_code, self._tensor.dtype.bits)
@property
def data_ptr(self) -> ctypes.c_void_p:
return self._tensor.data + self._tensor.byte_offset
@property
def contiguous(self) -> bool:
return is_contiguous_data(
self._tensor.ndim, self._tensor.shape, self._tensor.strides
)
DLPACK_DEVICE_TYPE_TO_MEMORY_TYPE: dict[DLDeviceType, MemoryType] = CustomKeyErrorDict(
"DLPack device type",
"Memory type",
{
DLDeviceType.kDLCUDA: MemoryType.GPU,
DLDeviceType.kDLCPU: MemoryType.CPU,
},
)
MEMORY_TYPE_TO_DLPACK_DEVICE_TYPE: dict[MemoryType, DLDeviceType] = CustomKeyErrorDict(
"Memory type",
"DLPack device type",
{
**{value: key for key, value in DLPACK_DEVICE_TYPE_TO_MEMORY_TYPE.items()},
**{MemoryType.CPU_PINNED: DLDeviceType.kDLCPU},
},
)
def parse_device_or_memory_type(
device_or_memory_type: DeviceOrMemoryType,
) -> tuple[MemoryType, int]:
memory_type = None
memory_type_id = 0
if isinstance(device_or_memory_type, tuple):
if isinstance(device_or_memory_type[0], MemoryType):
memory_type = device_or_memory_type[0]
memory_type_id = device_or_memory_type[1]
elif isinstance(device_or_memory_type[0], DLDeviceType):
memory_type = DLPACK_DEVICE_TYPE_TO_MEMORY_TYPE[device_or_memory_type[0]]
memory_type_id = device_or_memory_type[1]
else:
raise ValueError(f"Invalid memory type {device_or_memory_type}")
elif isinstance(device_or_memory_type, MemoryType):
memory_type = device_or_memory_type
memory_type_id = 0
elif isinstance(device_or_memory_type, str):
memory_str_tuple = device_or_memory_type.split(":")
if len(memory_str_tuple) > 2:
raise ValueError(f"Invalid memory type string {device_or_memory_type}")
memory_type = string_to_memory_type(memory_str_tuple[0].upper())
if len(memory_str_tuple) == 2:
try:
memory_type_id = int(memory_str_tuple[1])
except ValueError:
raise ValueError(
f"Invalid memory type string {device_or_memory_type}"
) from None
else:
memory_type_id = 0
return (memory_type, memory_type_id)
DLPACK_TO_DATA_TYPE: dict[tuple[DLDataTypeCode, int], DataType] = CustomKeyErrorDict(
"DLPack data type",
"Data type",
{
(DLDataTypeCode.kDLBool, 8): DataType.BOOL,
(DLDataTypeCode.kDLInt, 8): DataType.INT8,
(
DLDataTypeCode.kDLInt,
16,
): DataType.INT16,
(
DLDataTypeCode.kDLInt,
32,
): DataType.INT32,
(
DLDataTypeCode.kDLInt,
64,
): DataType.INT64,
(
DLDataTypeCode.kDLUInt,
8,
): DataType.UINT8,
(
DLDataTypeCode.kDLUInt,
16,
): DataType.UINT16,
(
DLDataTypeCode.kDLUInt,
32,
): DataType.UINT32,
(
DLDataTypeCode.kDLUInt,
64,
): DataType.UINT64,
(
DLDataTypeCode.kDLFloat,
16,
): DataType.FP16,
(
DLDataTypeCode.kDLFloat,
32,
): DataType.FP32,
(
DLDataTypeCode.kDLFloat,
64,
): DataType.FP64,
(
DLDataTypeCode.kDLBfloat,
16,
): DataType.BF16,
},
)
DATA_TYPE_TO_DLPACK_DTYPE: dict[DataType, DLDataType] = CustomKeyErrorDict(
"Data type",
"DLPack data type",
{
value: DLDataType(type_code=key[0], bits=key[1], lanes=1)
for key, value in DLPACK_TO_DATA_TYPE.items()
},
)
......@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Abstract Class for interacting with Triton Inference Serving Platform Inter-Component Protocol Data Plane"""
"""Abstract Class for interacting with Triton Distributed Inter-Component Protocol Data Plane"""
import abc
import uuid
......@@ -21,28 +21,16 @@ from typing import Optional, Sequence
import cupy
import numpy
from tritonserver import (
from triton_distributed.icp.data_type import (
DATA_TYPE_TO_NUMPY_DTYPE,
DataType,
InvalidArgumentError,
MemoryBuffer,
MemoryType,
Tensor,
)
from tritonserver._api._datautils import (
STRING_TO_TRITON_MEMORY_TYPE,
TRITON_TO_NUMPY_DTYPE,
)
from tritonserver._c.triton_bindings import (
TRITONSERVER_DataTypeString as DataTypeString,
)
from tritonserver._c.triton_bindings import (
TRITONSERVER_MemoryTypeString as MemoryTypeString,
string_to_data_type,
)
from tritonserver._c.triton_bindings import (
TRITONSERVER_StringToDataType as StringToDataType,
)
from triton_distributed.icp.memory_buffer import MemoryBuffer
from triton_distributed.icp.memory_type import MemoryType, string_to_memory_type
from triton_distributed.icp.protos.icp_pb2 import ModelInferRequest, ModelInferResponse
from triton_distributed.icp.tensor import Tensor
class DataPlaneError(Exception):
......@@ -73,13 +61,13 @@ def set_icp_data_type(
message: ModelInferRequest.InferInputTensor | ModelInferResponse.InferOutputTensor,
value: DataType,
) -> None:
message.datatype = DataTypeString(value)
message.datatype = value.name
def get_icp_data_type(
message: ModelInferRequest.InferInputTensor | ModelInferResponse.InferOutputTensor,
) -> DataType:
return StringToDataType(message.datatype)
return string_to_data_type(message.datatype)
def set_icp_tensor_uri(
......@@ -116,7 +104,7 @@ def set_icp_memory_type(
message: ModelInferRequest.InferInputTensor | ModelInferResponse.InferOutputTensor,
value: MemoryType,
) -> None:
message.parameters[ICP_MEMORY_TYPE].string_param = MemoryTypeString(value)
message.parameters[ICP_MEMORY_TYPE].string_param = value.name
def get_icp_memory_type(
......@@ -124,9 +112,7 @@ def get_icp_memory_type(
) -> MemoryType | None:
if ICP_MEMORY_TYPE not in message.parameters:
return None
return STRING_TO_TRITON_MEMORY_TYPE[
message.parameters[ICP_MEMORY_TYPE].string_param
]
return string_to_memory_type(message.parameters[ICP_MEMORY_TYPE].string_param)
def set_icp_memory_type_id(
......@@ -163,9 +149,7 @@ def set_icp_tensor_contents(
with cupy.cuda.Device(tensor.memory_buffer.memory_type_id):
array = cupy.from_dlpack(tensor)
else:
raise InvalidArgumentError(
f"Invalid Tensor Memory Type {tensor.memory_type}"
)
raise ValueError(f"Invalid Tensor Memory Type {tensor.memory_type}")
message.contents.bytes_contents.append(array.tobytes())
......@@ -193,7 +177,7 @@ def get_icp_tensor_contents(
array = numpy.array(
numpy.frombuffer(
message.contents.bytes_contents[0],
dtype=TRITON_TO_NUMPY_DTYPE[datatype],
dtype=DATA_TYPE_TO_NUMPY_DTYPE[datatype],
)
)
tensor = Tensor(datatype, shape, MemoryBuffer.from_dlpack(array))
......
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from enum import IntEnum
import numpy
from triton_distributed.icp._custom_key_error_dict import CustomKeyErrorDict
DataType = IntEnum(
"DataType",
names=(
"INVALID",
"BOOL",
"UINT8",
"UINT16",
"UINT32",
"UINT64",
"INT8",
"INT16",
"INT32",
"INT64",
"FP16",
"FP32",
"FP64",
"BYTES",
"BF16",
),
start=0,
)
def string_to_data_type(data_type_string: str) -> DataType:
try:
return DataType[data_type_string]
except KeyError:
raise ValueError(
f"Unsupported Data Type String. Can't convert {data_type_string} to DataType"
) from None
NUMPY_TO_DATA_TYPE: dict[type, DataType] = CustomKeyErrorDict(
"Numpy dtype",
"Data type",
{
bool: DataType.BOOL,
numpy.bool_: DataType.BOOL,
numpy.int8: DataType.INT8,
numpy.int16: DataType.INT16,
numpy.int32: DataType.INT32,
numpy.int64: DataType.INT64,
numpy.uint8: DataType.UINT8,
numpy.uint16: DataType.UINT16,
numpy.uint32: DataType.UINT32,
numpy.uint64: DataType.UINT64,
numpy.float16: DataType.FP16,
numpy.float32: DataType.FP32,
numpy.float64: DataType.FP64,
numpy.bytes_: DataType.BYTES,
numpy.str_: DataType.BYTES,
numpy.object_: DataType.BYTES,
},
)
DATA_TYPE_TO_NUMPY_DTYPE: dict[DataType, type] = CustomKeyErrorDict(
"Data type",
"Numpy dtype",
{
**{value: key for key, value in NUMPY_TO_DATA_TYPE.items()},
**{DataType.BYTES: numpy.object_},
**{DataType.BOOL: numpy.bool_},
},
)
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
from triton_distributed.icp._dlpack import DLPackObject
from triton_distributed.icp.memory_type import MemoryType
@dataclass
class MemoryBuffer:
"""Memory allocated for a Tensor.
This object does not own the memory but holds a reference to the
owner.
Parameters
----------
data_ptr : int
Pointer to the allocated memory.
memory_type : MemoryType
memory type
memory_type_id : int
memory type id (typically the same as device id)
size : int
Size of the allocated memory in bytes.
owner : Any
Object that owns or manages the memory buffer. Allocated
memory must not be freed while a reference to the owner is
held.
Examples
--------
>>> buffer = MemoryBuffer.from_dlpack(numpy.array([100],dtype=numpy.uint8))
"""
data_ptr: int
memory_type: MemoryType
memory_type_id: int
size: int
owner: Any
@staticmethod
def from_dlpack(owner: Any) -> MemoryBuffer:
if not hasattr(owner, "__dlpack__"):
raise ValueError("Object does not support DLpack protocol")
dlpack_object = DLPackObject(owner)
return MemoryBuffer._from_dlpack_object(owner, dlpack_object)
@staticmethod
def _from_dlpack_object(owner: Any, dlpack_object: DLPackObject) -> MemoryBuffer:
if not dlpack_object.contiguous:
raise ValueError("Only contiguous memory is supported")
return MemoryBuffer(
int(dlpack_object.data_ptr),
dlpack_object.memory_type,
dlpack_object.memory_type_id,
dlpack_object.byte_size,
owner,
)
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from enum import IntEnum
MemoryType = IntEnum("MemoryType", names=("CPU", "CPU_PINNED", "GPU"), start=0)
def string_to_memory_type(memory_type_string: str) -> MemoryType:
try:
return MemoryType[memory_type_string]
except KeyError:
raise ValueError(
f"Unsupported Memory Type String. Can't convert {memory_type_string} to MemoryType"
) from None
......@@ -25,7 +25,6 @@ from typing import Dict, Optional
from urllib.parse import urlsplit, urlunsplit
import nats
from tritonserver import InvalidArgumentError
from triton_distributed.icp.protos.icp_pb2 import ModelInferRequest, ModelInferResponse
from triton_distributed.icp.request_plane import (
......@@ -203,7 +202,7 @@ class NatsRequestPlane(RequestPlane):
Optional[nats.js.JetStreamContext.PullSubscription],
]:
if self._jet_stream is None:
raise InvalidArgumentError(
raise ValueError(
"Failed to get model stream: NATS Jetstream not connected!"
)
......@@ -335,19 +334,15 @@ class NatsRequestPlane(RequestPlane):
responses: AsyncIterator[ModelInferResponse] | ModelInferResponse,
):
if self._jet_stream is None:
raise InvalidArgumentError(
"Failed to post response: NATS Jetstream not connected!"
)
raise ValueError("Failed to post response: NATS Jetstream not connected!")
request_id = get_icp_request_id(request)
if request_id is None:
raise InvalidArgumentError("ICP request must have request id")
raise ValueError("ICP request must have request id")
response_to_uri = get_icp_response_to_uri(request)
if not response_to_uri:
raise InvalidArgumentError(
"Attempting to send a response when non requested"
)
raise ValueError("Attempting to send a response when non requested")
parsed = urlsplit(response_to_uri)
response_stream = parsed.path.replace("/", "")
......@@ -378,12 +373,10 @@ class NatsRequestPlane(RequestPlane):
] = None,
) -> AsyncIterator[ModelInferResponse]:
if self._jet_stream is None:
raise InvalidArgumentError(
"Failed to post request: NATS Jetstream not connected!"
)
raise ValueError("Failed to post request: NATS Jetstream not connected!")
if response_iterator and response_handler:
raise InvalidArgumentError(
raise ValueError(
"Can only specify either response handler or response iterator"
)
......
......@@ -13,14 +13,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Abstract Class for interacting with the Triton Inference Serving Platform Inter-Component Protocol Control Plane"""
"""Abstract Class for interacting with the Triton Distributed Inter-Component Protocol Control Plane"""
import abc
import uuid
from typing import AsyncIterator, Awaitable, Callable, Optional
from tritonserver import TritonError
from triton_distributed.icp.protos.icp_pb2 import ModelInferRequest, ModelInferResponse
ICP_REQUEST_ID = "icp_request_id"
......@@ -33,6 +31,10 @@ ICP_REQUEST_CANCELLED = "icp_request_cancelled"
ICP_ERROR = "icp_response_error"
class RequestPlaneError(Exception):
pass
def get_icp_request_id(
message: ModelInferRequest | ModelInferResponse,
) -> uuid.UUID | None:
......@@ -47,13 +49,15 @@ def set_icp_request_id(
message.parameters[ICP_REQUEST_ID].string_param = str(value)
def get_icp_response_error(message: ModelInferResponse) -> TritonError | None:
def get_icp_response_error(message: ModelInferResponse) -> RequestPlaneError | None:
if ICP_ERROR not in message.parameters:
return None
return TritonError(message.parameters[ICP_ERROR].string_param)
return RequestPlaneError(message.parameters[ICP_ERROR].string_param)
def set_icp_response_error(message: ModelInferResponse, value: TritonError) -> None:
def set_icp_response_error(
message: ModelInferResponse, value: RequestPlaneError
) -> None:
message.parameters[ICP_ERROR].string_param = str(value)
......
This diff is collapsed.
......@@ -25,10 +25,8 @@ from urllib.parse import urlsplit
import cupy
import numpy
import tritonserver
import ucp
from cupy_backends.cuda.api.runtime import CUDARuntimeError
from tritonserver import InvalidArgumentError, MemoryBuffer, MemoryType, Tensor
from triton_distributed.icp.data_plane import (
DataPlane,
......@@ -48,7 +46,11 @@ from triton_distributed.icp.data_plane import (
set_icp_tensor_size,
set_icp_tensor_uri,
)
from triton_distributed.icp.data_type import DataType
from triton_distributed.icp.memory_buffer import MemoryBuffer
from triton_distributed.icp.memory_type import MemoryType
from triton_distributed.icp.protos.icp_pb2 import ModelInferRequest, ModelInferResponse
from triton_distributed.icp.tensor import Tensor
LOGGER = logging.getLogger(__name__)
......@@ -175,20 +177,18 @@ class _UcpDataPlane(DataPlane):
if tensor_id in self._tensor_store:
tensor = self._tensor_store[tensor_id]
array_module = numpy
if tensor.memory_type == tritonserver.MemoryType.CPU:
if tensor.memory_type == MemoryType.CPU:
array_module = numpy
device_manager = contextlib.nullcontext()
elif tensor.memory_type == tritonserver.MemoryType.GPU:
elif tensor.memory_type == MemoryType.GPU:
array_module = cupy
device_manager = cupy.cuda.Device(
tensor.memory_buffer.memory_type_id
)
else:
raise InvalidArgumentError(
f"Invalid Memory Type {tensor.memory_type}"
)
raise ValueError(f"Invalid Memory Type {tensor.memory_type}")
with device_manager:
if tensor.data_type == tritonserver.DataType.BYTES:
if tensor.data_type == DataType.BYTES:
array = tensor.memory_buffer.owner
else:
array = array_module.from_dlpack(tensor)
......@@ -343,7 +343,7 @@ class _UcpDataPlane(DataPlane):
if requested_memory_type is not None:
memory_type = requested_memory_type
if memory_type == tritonserver.MemoryType.GPU and self._cuda_is_available:
if memory_type == MemoryType.GPU and self._cuda_is_available:
array_module = cupy
if requested_memory_type_id is not None:
device_manager = cupy.cuda.Device(requested_memory_type_id)
......
......@@ -24,10 +24,11 @@ import numpy
import pytest
import ucp
from cupy_backends.cuda.api.runtime import CUDARuntimeError
from tritonserver import DataType, MemoryType, Tensor
from tritonserver._api._datautils import TRITON_TO_NUMPY_DTYPE
from triton_distributed.icp.data_plane import DataPlaneError
from triton_distributed.icp.data_type import DATA_TYPE_TO_NUMPY_DTYPE, DataType
from triton_distributed.icp.memory_type import MemoryType
from triton_distributed.icp.tensor import Tensor
from triton_distributed.icp.ucp_data_plane import (
UcpDataPlane,
get_icp_tensor_uri,
......@@ -283,7 +284,7 @@ def test_requested_memory_type(memory_type, memory_type_id, request):
def _get_random_tensor(data_type: DataType, size: Sequence[int]):
dtype = TRITON_TO_NUMPY_DTYPE[data_type]
dtype = DATA_TYPE_TO_NUMPY_DTYPE[data_type]
value = numpy.random.rand(*size)
return value.astype(dtype)
......
[build-system]
requires = ["setuptools>=65.0", "setuptools-scm>=8"]
build-backend = "setuptools.build_meta"
[project]
name = "triton_distributed.runtime"
dynamic = ["version"]
dependencies = ["triton_distributed.icp >= 0"]
[tool.setuptools_scm]
version_file = "src/triton_distributed/runtime/_version.py"
root = "../.."
[tool.setuptools.packages.find]
where = ["src"]
include = ["triton_distributed.runtime*"]
namespaces= true
......@@ -24,8 +24,13 @@ from triton_distributed.runtime.remote_request import (
from triton_distributed.runtime.remote_response import (
RemoteInferenceResponse as RemoteInferenceResponse,
)
from triton_distributed.runtime.triton_core_operator import (
try:
from triton_distributed.runtime.triton_core_operator import (
TritonCoreOperator as TritonCoreOperator,
)
)
except ImportError:
pass
from triton_distributed.runtime.worker import Worker as Worker
from triton_distributed.runtime.worker import WorkerConfig as WorkerConfig
......@@ -16,8 +16,6 @@ import multiprocessing
from pprint import pformat
from typing import Optional, Type
from tritonserver import InvalidArgumentError
from triton_distributed.icp import (
DataPlane,
NatsRequestPlane,
......@@ -71,7 +69,7 @@ class Deployment:
if self._default_request_plane == NatsRequestPlane:
self.request_plane_server = NatsServer(log_dir=self._default_log_dir)
else:
raise InvalidArgumentError(
raise ValueError(
f"Unknown Request Plane Type, can not initialize {self._default_request_plane}"
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment