Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Qwen_lmdeploy
Commits
fcefbf3d
Commit
fcefbf3d
authored
Nov 30, 2023
by
xiabo
Browse files
重新整理工程
parent
d592fbea
Changes
170
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
8704 deletions
+0
-8704
3rdparty/core-r22.12/cmake/TritonCoreConfig.cmake.in
3rdparty/core-r22.12/cmake/TritonCoreConfig.cmake.in
+0
-37
3rdparty/core-r22.12/include/triton/core/tritonbackend.h
3rdparty/core-r22.12/include/triton/core/tritonbackend.h
+0
-1410
3rdparty/core-r22.12/include/triton/core/tritonrepoagent.h
3rdparty/core-r22.12/include/triton/core/tritonrepoagent.h
+0
-417
3rdparty/core-r22.12/include/triton/core/tritonserver.h
3rdparty/core-r22.12/include/triton/core/tritonserver.h
+0
-2360
3rdparty/core-r22.12/src/backend_config.cc
3rdparty/core-r22.12/src/backend_config.cc
+0
-225
3rdparty/core-r22.12/src/backend_config.h
3rdparty/core-r22.12/src/backend_config.h
+0
-77
3rdparty/core-r22.12/src/backend_manager.cc
3rdparty/core-r22.12/src/backend_manager.cc
+0
-383
3rdparty/core-r22.12/src/backend_manager.h
3rdparty/core-r22.12/src/backend_manager.h
+0
-174
3rdparty/core-r22.12/src/backend_memory_manager.cc
3rdparty/core-r22.12/src/backend_memory_manager.cc
+0
-149
3rdparty/core-r22.12/src/backend_memory_manager.h
3rdparty/core-r22.12/src/backend_memory_manager.h
+0
-36
3rdparty/core-r22.12/src/backend_model.cc
3rdparty/core-r22.12/src/backend_model.cc
+0
-1301
3rdparty/core-r22.12/src/backend_model.h
3rdparty/core-r22.12/src/backend_model.h
+0
-133
3rdparty/core-r22.12/src/backend_model_instance.cc
3rdparty/core-r22.12/src/backend_model_instance.cc
+0
-966
3rdparty/core-r22.12/src/backend_model_instance.h
3rdparty/core-r22.12/src/backend_model_instance.h
+0
-200
3rdparty/core-r22.12/src/buffer_attributes.cc
3rdparty/core-r22.12/src/buffer_attributes.cc
+0
-104
3rdparty/core-r22.12/src/buffer_attributes.h
3rdparty/core-r22.12/src/buffer_attributes.h
+0
-79
3rdparty/core-r22.12/src/constants.h
3rdparty/core-r22.12/src/constants.h
+0
-108
3rdparty/core-r22.12/src/cuda_memory_manager.cc
3rdparty/core-r22.12/src/cuda_memory_manager.cc
+0
-197
3rdparty/core-r22.12/src/cuda_memory_manager.h
3rdparty/core-r22.12/src/cuda_memory_manager.h
+0
-85
3rdparty/core-r22.12/src/cuda_utils.cc
3rdparty/core-r22.12/src/cuda_utils.cc
+0
-263
No files found.
Too many changes to show.
To preserve performance only
170 of 170+
files are displayed.
Plain diff
Email patch
3rdparty/core-r22.12/cmake/TritonCoreConfig.cmake.in
deleted
100644 → 0
View file @
d592fbea
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include(CMakeFindDependencyMacro)
get_filename_component(
TRITONCORE_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
)
list(APPEND CMAKE_MODULE_PATH ${TRITONCORE_CMAKE_DIR})
if(NOT TARGET TritonCore::triton-core-serverapi)
include("${TRITONCORE_CMAKE_DIR}/TritonCoreTargets.cmake")
endif()
3rdparty/core-r22.12/include/triton/core/tritonbackend.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <stddef.h>
#include <stdint.h>
#include "triton/core/tritonserver.h"
#ifdef __cplusplus
extern
"C"
{
#endif
#ifdef _COMPILING_TRITONBACKEND
#if defined(_MSC_VER)
#define TRITONBACKEND_DECLSPEC __declspec(dllexport)
#define TRITONBACKEND_ISPEC __declspec(dllimport)
#elif defined(__GNUC__)
#define TRITONBACKEND_DECLSPEC __attribute__((__visibility__("default")))
#define TRITONBACKEND_ISPEC
#else
#define TRITONBACKEND_DECLSPEC
#define TRITONBACKEND_ISPEC
#endif
#else
#if defined(_MSC_VER)
#define TRITONBACKEND_DECLSPEC __declspec(dllimport)
#define TRITONBACKEND_ISPEC __declspec(dllexport)
#else
#define TRITONBACKEND_DECLSPEC
#define TRITONBACKEND_ISPEC
#endif
#endif
struct
TRITONBACKEND_MemoryManager
;
struct
TRITONBACKEND_Input
;
struct
TRITONBACKEND_Output
;
struct
TRITONBACKEND_State
;
struct
TRITONBACKEND_Request
;
struct
TRITONBACKEND_ResponseFactory
;
struct
TRITONBACKEND_Response
;
struct
TRITONBACKEND_Backend
;
struct
TRITONBACKEND_Model
;
struct
TRITONBACKEND_ModelInstance
;
struct
TRITONBACKEND_BackendAttribute
;
///
/// TRITONBACKEND API Version
///
/// The TRITONBACKEND API is versioned with major and minor version
/// numbers. Any change to the API that does not impact backwards
/// compatibility (for example, adding a non-required function)
/// increases the minor version number. Any change that breaks
/// backwards compatibility (for example, deleting or changing the
/// behavior of a function) increases the major version number. A
/// backend should check that the API version used to compile the
/// backend is compatible with the API version of the Triton server
/// that it is running in. This is typically done by code similar to
/// the following which makes sure that the major versions are equal
/// and that the minor version of Triton is >= the minor version used
/// to build the backend.
///
/// uint32_t api_version_major, api_version_minor;
/// TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor);
/// if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
/// (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
/// return TRITONSERVER_ErrorNew(
/// TRITONSERVER_ERROR_UNSUPPORTED,
/// "triton backend API version does not support this backend");
/// }
///
#define TRITONBACKEND_API_VERSION_MAJOR 1
#define TRITONBACKEND_API_VERSION_MINOR 10
/// Get the TRITONBACKEND API version supported by Triton. This value
/// can be compared against the TRITONBACKEND_API_VERSION_MAJOR and
/// TRITONBACKEND_API_VERSION_MINOR used to build the backend to
/// ensure that Triton is compatible with the backend.
///
/// \param major Returns the TRITONBACKEND API major version supported
/// by Triton.
/// \param minor Returns the TRITONBACKEND API minor version supported
/// by Triton.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ApiVersion
(
uint32_t
*
major
,
uint32_t
*
minor
);
/// TRITONBACKEND_ArtifactType
///
/// The ways that the files that make up a backend or model are
/// communicated to the backend.
///
/// TRITONBACKEND_ARTIFACT_FILESYSTEM: The model or backend
/// artifacts are made available to Triton via a locally
/// accessible filesystem. The backend can access these files
/// using an appropriate system API.
///
typedef
enum
TRITONBACKEND_artifacttype_enum
{
TRITONBACKEND_ARTIFACT_FILESYSTEM
}
TRITONBACKEND_ArtifactType
;
///
/// TRITONBACKEND_MemoryManager
///
/// Object representing an memory manager that is capable of
/// allocating and otherwise managing different memory types. For
/// improved performance Triton maintains pools for GPU and CPU-pinned
/// memory and the memory manager allows backends to access those
/// pools.
///
/// Allocate a contiguous block of memory of a specific type using a
/// memory manager. Two error codes have specific interpretations for
/// this function:
///
/// TRITONSERVER_ERROR_UNSUPPORTED: Indicates that Triton is
/// incapable of allocating the requested memory type and memory
/// type ID. Requests for the memory type and ID will always fail
/// no matter 'byte_size' of the request.
///
/// TRITONSERVER_ERROR_UNAVAILABLE: Indicates that Triton can
/// allocate the memory type and ID but that currently it cannot
/// allocate a contiguous block of memory of the requested
/// 'byte_size'.
///
/// \param manager The memory manager.
/// \param buffer Returns the allocated memory.
/// \param memory_type The type of memory to allocate.
/// \param memory_type_id The ID associated with the memory type to
/// allocate. For GPU memory this indicates the device ID of the GPU
/// to allocate from.
/// \param byte_size The size of memory to allocate, in bytes.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_MemoryManagerAllocate
(
TRITONBACKEND_MemoryManager
*
manager
,
void
**
buffer
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
,
const
uint64_t
byte_size
);
/// Free a buffer that was previously allocated with
/// TRITONBACKEND_MemoryManagerAllocate. The call must provide the
/// same values for 'memory_type' and 'memory_type_id' as were used
/// when the buffer was allocate or else the behavior is undefined.
///
/// \param manager The memory manager.
/// \param buffer The allocated memory buffer to free.
/// \param memory_type The type of memory of the buffer.
/// \param memory_type_id The ID associated with the memory type of
/// the buffer.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_MemoryManagerFree
(
TRITONBACKEND_MemoryManager
*
manager
,
void
*
buffer
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
);
///
/// TRITONBACKEND_Input
///
/// Object representing an input tensor.
///
/// Get the name and properties of an input tensor. The returned
/// strings and other properties are owned by the input, not the
/// caller, and so should not be modified or freed.
///
/// \param input The input tensor.
/// \param name If non-nullptr, returns the tensor name.
/// \param datatype If non-nullptr, returns the tensor datatype.
/// \param shape If non-nullptr, returns the tensor shape.
/// \param dim_count If non-nullptr, returns the number of dimensions
/// in the tensor shape.
/// \param byte_size If non-nullptr, returns the size of the available
/// data for the tensor, in bytes. This size reflects the actual data
/// available, and does not necessarily match what is
/// expected/required for the tensor given its shape and datatype. It
/// is the responsibility of the backend to handle mismatches in these
/// sizes appropriately.
/// \param buffer_count If non-nullptr, returns the number of buffers
/// holding the contents of the tensor. These buffers are accessed
/// using TRITONBACKEND_InputBuffer.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_InputProperties
(
TRITONBACKEND_Input
*
input
,
const
char
**
name
,
TRITONSERVER_DataType
*
datatype
,
const
int64_t
**
shape
,
uint32_t
*
dims_count
,
uint64_t
*
byte_size
,
uint32_t
*
buffer_count
);
/// Get the name and properties of an input tensor associated with a given
/// host policy. If there are no input buffers for the specified host policy,
/// the properties of the fallback input buffers are returned. The returned
/// strings and other properties are owned by the input, not the caller, and so
/// should not be modified or freed.
///
/// \param input The input tensor.
/// \param host_policy_name The host policy name. Fallback input properties
/// will be return if nullptr is provided.
/// \param name If non-nullptr, returns the tensor name.
/// \param datatype If non-nullptr, returns the tensor datatype.
/// \param shape If non-nullptr, returns the tensor shape.
/// \param dim_count If non-nullptr, returns the number of dimensions
/// in the tensor shape.
/// \param byte_size If non-nullptr, returns the size of the available
/// data for the tensor, in bytes. This size reflects the actual data
/// available, and does not necessarily match what is
/// expected/required for the tensor given its shape and datatype. It
/// is the responsibility of the backend to handle mismatches in these
/// sizes appropriately.
/// \param buffer_count If non-nullptr, returns the number of buffers
/// holding the contents of the tensor. These buffers are accessed
/// using TRITONBACKEND_InputBufferForHostPolicy.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_InputPropertiesForHostPolicy
(
TRITONBACKEND_Input
*
input
,
const
char
*
host_policy_name
,
const
char
**
name
,
TRITONSERVER_DataType
*
datatype
,
const
int64_t
**
shape
,
uint32_t
*
dims_count
,
uint64_t
*
byte_size
,
uint32_t
*
buffer_count
);
/// Get a buffer holding (part of) the tensor data for an input. For a
/// given input the number of buffers composing the input are found
/// from 'buffer_count' returned by TRITONBACKEND_InputProperties. The
/// returned buffer is owned by the input and so should not be
/// modified or freed by the caller. The lifetime of the buffer
/// matches that of the input and so the buffer should not be accessed
/// after the input tensor object is released.
///
/// \param input The input tensor.
/// \param index The index of the buffer. Must be 0 <= index <
/// buffer_count, where buffer_count is the value returned by
/// TRITONBACKEND_InputProperties.
/// \param buffer Returns a pointer to a contiguous block of data for
/// the named input.
/// \param buffer_byte_size Returns the size, in bytes, of 'buffer'.
/// \param memory_type Acts as both input and output. On input gives
/// the buffer memory type preferred by the function caller. Returns
/// the actual memory type of 'buffer'.
/// \param memory_type_id Acts as both input and output. On input
/// gives the buffer memory type id preferred by the function caller.
/// Returns the actual memory type id of 'buffer'.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_InputBuffer
(
TRITONBACKEND_Input
*
input
,
const
uint32_t
index
,
const
void
**
buffer
,
uint64_t
*
buffer_byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
);
/// Get a buffer holding (part of) the tensor data for an input for a specific
/// host policy. If there are no input buffers specified for this host policy,
/// the fallback input buffer is returned.
/// For a given input the number of buffers composing the input are found
/// from 'buffer_count' returned by TRITONBACKEND_InputPropertiesForHostPolicy.
/// The returned buffer is owned by the input and so should not be modified or
/// freed by the caller. The lifetime of the buffer matches that of the input
/// and so the buffer should not be accessed after the input tensor object is
/// released.
///
/// \param input The input tensor.
/// \param host_policy_name The host policy name. Fallback input buffer
/// will be return if nullptr is provided.
/// \param index The index of the buffer. Must be 0 <= index <
/// buffer_count, where buffer_count is the value returned by
/// TRITONBACKEND_InputPropertiesForHostPolicy.
/// \param buffer Returns a pointer to a contiguous block of data for
/// the named input.
/// \param buffer_byte_size Returns the size, in bytes, of 'buffer'.
/// \param memory_type Acts as both input and output. On input gives
/// the buffer memory type preferred by the function caller. Returns
/// the actual memory type of 'buffer'.
/// \param memory_type_id Acts as both input and output. On input
/// gives the buffer memory type id preferred by the function caller.
/// Returns the actual memory type id of 'buffer'.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_InputBufferForHostPolicy
(
TRITONBACKEND_Input
*
input
,
const
char
*
host_policy_name
,
const
uint32_t
index
,
const
void
**
buffer
,
uint64_t
*
buffer_byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
);
/// Get the buffer attributes associated with the given input buffer. For a
/// given input the number of buffers composing the input are found from
/// 'buffer_count' returned by TRITONBACKEND_InputProperties. The returned
/// 'buffer_attributes' is owned by the input and so should not be modified or
/// freed by the caller. The lifetime of the 'buffer_attributes' matches that of
/// the input and so the 'buffer_attributes' should not be accessed after the
/// input tensor object is released.
///
/// \param input The input tensor.
/// \param index The index of the buffer. Must be 0 <= index < buffer_count,
/// where buffer_count is the value returned by TRITONBACKEND_InputProperties.
/// \param buffer Returns a pointer to a contiguous block of data for
/// the named input.
/// \param buffer_attributes Returns the attributes for the given buffer.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_InputBufferAttributes
(
TRITONBACKEND_Input
*
input
,
const
uint32_t
index
,
const
void
**
buffer
,
TRITONSERVER_BufferAttributes
**
buffer_attributes
);
///
/// TRITONBACKEND_Output
///
/// Object representing a response output tensor.
///
/// Get a buffer to use to hold the tensor data for the output. The
/// returned buffer is owned by the output and so should not be freed
/// by the caller. The caller can and should fill the buffer with the
/// output data for the tensor. The lifetime of the buffer matches
/// that of the output and so the buffer should not be accessed after
/// the output tensor object is released.
///
/// \param buffer Returns a pointer to a buffer where the contents of
/// the output tensor should be placed.
/// \param buffer_byte_size The size, in bytes, of the buffer required
/// by the caller.
/// \param memory_type Acts as both input and output. On input gives
/// the buffer memory type preferred by the caller. Returns the
/// actual memory type of 'buffer'.
/// \param memory_type_id Acts as both input and output. On input
/// gives the buffer memory type id preferred by the caller. Returns
/// the actual memory type id of 'buffer'.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_OutputBuffer
(
TRITONBACKEND_Output
*
output
,
void
**
buffer
,
const
uint64_t
buffer_byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
);
/// Get the buffer attributes associated with the given output buffer. The
/// returned 'buffer_attributes' is owned by the output and so should not be
/// modified or freed by the caller. The lifetime of the 'buffer_attributes'
/// matches that of the output and so the 'buffer_attributes' should not be
/// accessed after the output tensor object is released. This function must be
/// called after the TRITONBACKEND_OutputBuffer otherwise it might contain
/// incorrect data.
///
/// \param output The output tensor.
/// \param buffer_attributes Returns the attributes for the output buffer.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_OutputBufferAttributes
(
TRITONBACKEND_Output
*
output
,
TRITONSERVER_BufferAttributes
**
buffer_attributes
);
///
/// TRITONBACKEND_Request
///
/// Object representing an inference request.
///
/// Get the ID of the request. Can be nullptr if request doesn't have
/// an ID. The returned string is owned by the request, not the
/// caller, and so should not be modified or freed.
///
/// \param request The inference request.
/// \param id Returns the ID.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestId
(
TRITONBACKEND_Request
*
request
,
const
char
**
id
);
/// Get the correlation ID of the request if it is an unsigned integer.
/// Zero indicates that the request does not have a correlation ID.
/// Returns failure if correlation ID for given request is not an unsigned
/// integer.
///
/// \param request The inference request.
/// \param id Returns the correlation ID.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestCorrelationId
(
TRITONBACKEND_Request
*
request
,
uint64_t
*
id
);
/// Get the correlation ID of the request if it is a string.
/// Empty string indicates that the request does not have a correlation ID.
/// Returns error if correlation ID for given request is not a string.
///
/// \param request The inference request.
/// \param id Returns the correlation ID.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestCorrelationIdString
(
TRITONBACKEND_Request
*
request
,
const
char
**
id
);
/// Get the flag(s) associated with a request. On return 'flags' holds
/// a bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
/// available flags.
///
/// \param request The inference request.
/// \param flags Returns the flags.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestFlags
(
TRITONBACKEND_Request
*
request
,
uint32_t
*
flags
);
/// Get the number of input tensors specified in the request.
///
/// \param request The inference request.
/// \param count Returns the number of input tensors.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestInputCount
(
TRITONBACKEND_Request
*
request
,
uint32_t
*
count
);
/// Get the name of an input tensor. The caller does not own
/// the returned string and must not modify or delete it. The lifetime
/// of the returned string extends only as long as 'request'.
///
/// \param request The inference request.
/// \param index The index of the input tensor. Must be 0 <= index <
/// count, where count is the value returned by
/// TRITONBACKEND_RequestInputCount.
/// \param input_name Returns the name of the input tensor
/// corresponding to the index.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestInputName
(
TRITONBACKEND_Request
*
request
,
const
uint32_t
index
,
const
char
**
input_name
);
/// Get a named request input. The lifetime of the returned input
/// object matches that of the request and so the input object should
/// not be accessed after the request object is released.
///
/// \param request The inference request.
/// \param name The name of the input.
/// \param input Returns the input corresponding to the name.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestInput
(
TRITONBACKEND_Request
*
request
,
const
char
*
name
,
TRITONBACKEND_Input
**
input
);
/// Get a request input by index. The order of inputs in a given
/// request is not necessarily consistent with other requests, even if
/// the requests are in the same batch. As a result, you can not
/// assume that an index obtained from one request will point to the
/// same input in a different request.
///
/// The lifetime of the returned input object matches that of the
/// request and so the input object should not be accessed after the
/// request object is released.
///
/// \param request The inference request.
/// \param index The index of the input tensor. Must be 0 <= index <
/// count, where count is the value returned by
/// TRITONBACKEND_RequestInputCount.
/// \param input Returns the input corresponding to the index.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestInputByIndex
(
TRITONBACKEND_Request
*
request
,
const
uint32_t
index
,
TRITONBACKEND_Input
**
input
);
/// Get the number of output tensors requested to be returned in the
/// request.
///
/// \param request The inference request.
/// \param count Returns the number of output tensors.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestOutputCount
(
TRITONBACKEND_Request
*
request
,
uint32_t
*
count
);
/// Get the name of a requested output tensor. The caller does not own
/// the returned string and must not modify or delete it. The lifetime
/// of the returned string extends only as long as 'request'.
///
/// \param request The inference request.
/// \param index The index of the requested output tensor. Must be 0
/// <= index < count, where count is the value returned by
/// TRITONBACKEND_RequestOutputCount.
/// \param output_name Returns the name of the requested output tensor
/// corresponding to the index.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestOutputName
(
TRITONBACKEND_Request
*
request
,
const
uint32_t
index
,
const
char
**
output_name
);
/// Returns the preferred memory type and memory type ID of the output buffer
/// for the request. As much as possible, Triton will attempt to return
/// the same memory_type and memory_type_id values that will be returned by
/// the subsequent call to TRITONBACKEND_OutputBuffer, however, the backend must
/// be capable of handling cases where the values differ.
///
/// \param request The request.
/// \param name The name of the output tensor. This is optional
/// and it should be set to nullptr to indicate that the tensor name has
/// not determined.
/// \param byte_size The expected size of the buffer. This is optional
/// and it should be set to nullptr to indicate that the byte size has
/// not determined.
/// \param memory_type Acts as both input and output. On input gives
/// the memory type preferred by the caller. Returns memory type preferred
/// by Triton, taken account of the caller preferred type.
/// \param memory_type_id Acts as both input and output. On input gives
/// the memory type ID preferred by the caller. Returns memory type ID preferred
/// by Triton, taken account of the caller preferred type ID.
/// \return a TRITONSERVER_Error object if a failure occurs.
/// A TRITONSERVER_ERROR_UNAVAILABLE error indicates that the properties are not
/// available, other error codes indicate an error.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestOutputBufferProperties
(
TRITONBACKEND_Request
*
request
,
const
char
*
name
,
size_t
*
byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
);
/// Release the request. The request should be released when it is no
/// longer needed by the backend. If this call returns with an error
/// (i.e. non-nullptr) then the request was not released and ownership
/// remains with the backend. If this call returns with success, the
/// 'request' object is no longer owned by the backend and must not be
/// used. Any tensor names, data types, shapes, input tensors,
/// etc. returned by TRITONBACKEND_Request* functions for this request
/// are no longer valid. If a persistent copy of that data is required
/// it must be created before calling this function.
///
/// \param request The inference request.
/// \param release_flags Flags indicating what type of request release
/// should be performed. \see TRITONSERVER_RequestReleaseFlag. \see
/// TRITONSERVER_InferenceRequestReleaseFn_t.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestRelease
(
TRITONBACKEND_Request
*
request
,
uint32_t
release_flags
);
///
/// TRITONBACKEND_ResponseFactory
///
/// Object representing an inference response factory. Using a
/// response factory is not required; instead a response can be
/// generated directly from a TRITONBACKEND_Request object using
/// TRITONBACKEND_ResponseNew(). A response factory allows a request
/// to be released before all responses have been sent. Releasing a
/// request as early as possible releases all input tensor data and
/// therefore may be desirable in some cases.
/// Create the response factory associated with a request.
///
/// \param factory Returns the new response factory.
/// \param request The inference request.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseFactoryNew
(
TRITONBACKEND_ResponseFactory
**
factory
,
TRITONBACKEND_Request
*
request
);
/// Destroy a response factory.
///
/// \param factory The response factory.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseFactoryDelete
(
TRITONBACKEND_ResponseFactory
*
factory
);
/// Send response flags without a corresponding response.
///
/// \param factory The response factory.
/// \param send_flags Flags to send. \see
/// TRITONSERVER_ResponseCompleteFlag. \see
/// TRITONSERVER_InferenceResponseCompleteFn_t.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseFactorySendFlags
(
TRITONBACKEND_ResponseFactory
*
factory
,
const
uint32_t
send_flags
);
///
/// TRITONBACKEND_Response
///
/// Object representing an inference response. For a given request,
/// the backend must carefully manage the lifecycle of responses
/// generated for that request to ensure that the output tensor
/// buffers are allocated correctly. When a response is created with
/// TRITONBACKEND_ResponseNew or TRITONBACKEND_ResponseNewFromFactory,
/// all the outputs and corresponding buffers must be created for that
/// response using TRITONBACKEND_ResponseOutput and
/// TRITONBACKEND_OutputBuffer *before* another response is created
/// for the request. For a given response, outputs can be created in
/// any order but they must be created sequentially/sychronously (for
/// example, the backend cannot use multiple threads to simultaneously
/// add multiple outputs to a response).
///
/// The above requirement applies only to responses being generated
/// for a given request. The backend may generate responses in
/// parallel on multiple threads as long as those responses are for
/// different requests.
///
/// This order of response creation must be strictly followed. But,
/// once response(s) are created they do not need to be sent
/// immediately, nor do they need to be sent in the order they were
/// created. The backend may even delete a created response instead of
/// sending it by using TRITONBACKEND_ResponseDelete.
/// Create a response for a request.
///
/// \param response Returns the new response.
/// \param request The request.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseNew
(
TRITONBACKEND_Response
**
response
,
TRITONBACKEND_Request
*
request
);
/// Create a response using a factory.
///
/// \param response Returns the new response.
/// \param factory The response factory.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseNewFromFactory
(
TRITONBACKEND_Response
**
response
,
TRITONBACKEND_ResponseFactory
*
factory
);
/// Destroy a response. It is not necessary to delete a response if
/// TRITONBACKEND_ResponseSend is called as that function transfers
/// ownership of the response object to Triton.
///
/// \param response The response.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseDelete
(
TRITONBACKEND_Response
*
response
);
/// Set a string parameter in the response.
///
/// \param response The response.
/// \param name The name of the parameter.
/// \param value The value of the parameter.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseSetStringParameter
(
TRITONBACKEND_Response
*
response
,
const
char
*
name
,
const
char
*
value
);
/// Set an integer parameter in the response.
///
/// \param response The response.
/// \param name The name of the parameter.
/// \param value The value of the parameter.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseSetIntParameter
(
TRITONBACKEND_Response
*
response
,
const
char
*
name
,
const
int64_t
value
);
/// Set an boolean parameter in the response.
///
/// \param response The response.
/// \param name The name of the parameter.
/// \param value The value of the parameter.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseSetBoolParameter
(
TRITONBACKEND_Response
*
response
,
const
char
*
name
,
const
bool
value
);
/// Create an output tensor in the response. The lifetime of the
/// returned output tensor object matches that of the response and so
/// the output tensor object should not be accessed after the response
/// object is deleted.
///
/// \param response The response.
/// \param output Returns the new response output.
/// \param name The name of the output tensor.
/// \param datatype The datatype of the output tensor.
/// \param shape The shape of the output tensor.
/// \param dims_count The number of dimensions in the output tensor
/// shape.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseOutput
(
TRITONBACKEND_Response
*
response
,
TRITONBACKEND_Output
**
output
,
const
char
*
name
,
const
TRITONSERVER_DataType
datatype
,
const
int64_t
*
shape
,
const
uint32_t
dims_count
);
/// Send a response. Calling this function transfers ownership of the
/// response object to Triton. The caller must not access or delete
/// the response object after calling this function.
///
/// \param response The response.
/// \param send_flags Flags associated with the response. \see
/// TRITONSERVER_ResponseCompleteFlag. \see
/// TRITONSERVER_InferenceResponseCompleteFn_t.
/// \param error The TRITONSERVER_Error to send if the response is an
/// error, or nullptr if the response is successful.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseSend
(
TRITONBACKEND_Response
*
response
,
const
uint32_t
send_flags
,
TRITONSERVER_Error
*
error
);
///
/// TRITONBACKEND_State
///
/// Object representing a state.
///
/// Create a state in the request. The returned state object is only valid
/// before the TRITONBACKEND_StateUpdate is called. The state should not be
/// freed by the caller. If TRITONBACKEND_StateUpdate is not called, the
/// lifetime of the state matches the lifetime of the request. If the state name
/// does not exist in the "state" section of the model configuration, the state
/// will not be created and an error will be returned. If this function is
/// called when sequence batching is not enabled or there is no 'states' section
/// in the sequence batching section of the model configuration, this call will
/// return an error.
///
/// \param state Returns the new state.
/// \param request The request.
/// \param name The name of the state.
/// \param datatype The datatype of the state.
/// \param shape The shape of the state.
/// \param dims_count The number of dimensions in the state shape.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_StateNew
(
TRITONBACKEND_State
**
state
,
TRITONBACKEND_Request
*
request
,
const
char
*
name
,
const
TRITONSERVER_DataType
datatype
,
const
int64_t
*
shape
,
const
uint32_t
dims_count
);
/// Update the state for the sequence. Calling this function will replace the
/// state stored for this seqeunce in Triton with 'state' provided in the
/// function argument. If this function is called when sequence batching is not
/// enabled or there is no 'states' section in the sequence batching section of
/// the model configuration, this call will return an error. The backend is not
/// required to call this function. If the backend doesn't call
/// TRITONBACKEND_StateUpdate function, this particular state for the sequence
/// will not be updated and the next inference request in the sequence will use
/// the same state as the current inference request.
///
/// \param state The state.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_StateUpdate
(
TRITONBACKEND_State
*
state
);
/// Get a buffer to use to hold the tensor data for the state. The returned
/// buffer is owned by the state and so should not be freed by the caller. The
/// caller can and should fill the buffer with the state data. The buffer must
/// not be accessed by the backend after TRITONBACKEND_StateUpdate is called.
/// The caller should fill the buffer before calling TRITONBACKEND_StateUpdate.
///
/// \param state The state.
/// \param buffer Returns a pointer to a buffer where the contents of the state
/// should be placed.
/// \param buffer_byte_size The size, in bytes, of the buffer required
/// by the caller.
/// \param memory_type Acts as both input and output. On input gives
/// the buffer memory type preferred by the caller. Returns the
/// actual memory type of 'buffer'.
/// \param memory_type_id Acts as both input and output. On input
/// gives the buffer memory type id preferred by the caller. Returns
/// the actual memory type id of 'buffer'.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_StateBuffer
(
TRITONBACKEND_State
*
state
,
void
**
buffer
,
const
uint64_t
buffer_byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
);
/// Get the buffer attributes associated with the given state buffer.
/// The returned 'buffer_attributes' is owned by the state and so should not be
/// modified or freed by the caller. The lifetime of the 'buffer_attributes'
/// matches that of the state.
///
/// \param state The state.
/// \param buffer_attributes Returns the buffer attributes for the given state.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_StateBufferAttributes
(
TRITONBACKEND_State
*
state
,
TRITONSERVER_BufferAttributes
**
buffer_attributes
);
///
/// TRITONBACKEND_Backend
///
/// Object representing a backend.
///
/// TRITONBACKEND_ExecutionPolicy
///
/// Types of execution policy that can be implemented by a backend.
///
/// TRITONBACKEND_EXECUTION_BLOCKING: An instance of the model
/// blocks in TRITONBACKEND_ModelInstanceExecute until it is ready
/// to handle another inference. Upon returning from
/// TRITONBACKEND_ModelInstanceExecute, Triton may immediately
/// call TRITONBACKEND_ModelInstanceExecute for the same instance
/// to execute a new batch of requests. Thus, most backends using
/// this policy will not return from
/// TRITONBACKEND_ModelInstanceExecute until all responses have
/// been sent and all requests have been released. This is the
/// default execution policy.
///
/// TRITONBACKEND_EXECUTION_DEVICE_BLOCKING: An instance, A, of the
/// model blocks in TRITONBACKEND_ModelInstanceExecute if the
/// device associated with the instance is unable to handle
/// another inference. Even if another instance, B, associated
/// with the device, is available and ready to perform an
/// inference, Triton will not invoke
/// TRITONBACKEND_ModeInstanceExecute for B until A returns from
/// TRITONBACKEND_ModelInstanceExecute. Triton will not be blocked
/// from calling TRITONBACKEND_ModelInstanceExecute for instance
/// C, which is associated with a different device than A and B,
/// even if A or B has not returned from
/// TRITONBACKEND_ModelInstanceExecute. This execution policy is
/// typically used by a backend that can cooperatively execute
/// multiple model instances on the same device.
///
typedef
enum
TRITONBACKEND_execpolicy_enum
{
TRITONBACKEND_EXECUTION_BLOCKING
,
TRITONBACKEND_EXECUTION_DEVICE_BLOCKING
}
TRITONBACKEND_ExecutionPolicy
;
/// Get the name of the backend. The caller does not own the returned
/// string and must not modify or delete it. The lifetime of the
/// returned string extends only as long as 'backend'.
///
/// \param backend The backend.
/// \param name Returns the name of the backend.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendName
(
TRITONBACKEND_Backend
*
backend
,
const
char
**
name
);
/// Get the backend configuration. The 'backend_config' message is
/// owned by Triton and should not be modified or freed by the caller.
///
/// The backend configuration, as JSON, is:
///
/// {
/// "cmdline" : {
/// "<setting>" : "<value>",
/// ...
/// }
/// }
///
/// \param backend The backend.
/// \param backend_config Returns the backend configuration as a message.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendConfig
(
TRITONBACKEND_Backend
*
backend
,
TRITONSERVER_Message
**
backend_config
);
/// Get the execution policy for this backend. By default the
/// execution policy is TRITONBACKEND_EXECUTION_BLOCKING.
///
/// \param backend The backend.
/// \param policy Returns the execution policy.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendExecutionPolicy
(
TRITONBACKEND_Backend
*
backend
,
TRITONBACKEND_ExecutionPolicy
*
policy
);
/// Set the execution policy for this backend. By default the
/// execution policy is TRITONBACKEND_EXECUTION_BLOCKING. Triton reads
/// the backend's execution policy after calling
/// TRITONBACKEND_Initialize, so to be recognized changes to the
/// execution policy must be made in TRITONBACKEND_Initialize.
/// Also, note that if using sequence batcher for the model, Triton will
/// use TRITONBACKEND_EXECUTION_BLOCKING policy irrespective of the
/// policy specified by this setter function.
///
/// \param backend The backend.
/// \param policy The execution policy.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendSetExecutionPolicy
(
TRITONBACKEND_Backend
*
backend
,
TRITONBACKEND_ExecutionPolicy
policy
);
/// Get the location of the files that make up the backend
/// implementation. This location contains the backend shared library
/// and any other files located with the shared library. The
/// 'location' communicated depends on how the backend is being
/// communicated to Triton as indicated by 'artifact_type'.
///
/// TRITONBACKEND_ARTIFACT_FILESYSTEM: The backend artifacts are
/// made available to Triton via the local filesytem. 'location'
/// returns the full path to the directory containing this
/// backend's artifacts. The returned string is owned by Triton,
/// not the caller, and so should not be modified or freed.
///
/// \param backend The backend.
/// \param artifact_type Returns the artifact type for the backend.
/// \param path Returns the location.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendArtifacts
(
TRITONBACKEND_Backend
*
backend
,
TRITONBACKEND_ArtifactType
*
artifact_type
,
const
char
**
location
);
/// Get the memory manager associated with a backend.
///
/// \param backend The backend.
/// \param manager Returns the memory manager.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendMemoryManager
(
TRITONBACKEND_Backend
*
backend
,
TRITONBACKEND_MemoryManager
**
manager
);
/// Get the user-specified state associated with the backend. The
/// state is completely owned and managed by the backend.
///
/// \param backend The backend.
/// \param state Returns the user state, or nullptr if no user state.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendState
(
TRITONBACKEND_Backend
*
backend
,
void
**
state
);
/// Set the user-specified state associated with the backend. The
/// state is completely owned and managed by the backend.
///
/// \param backend The backend.
/// \param state The user state, or nullptr if no user state.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendSetState
(
TRITONBACKEND_Backend
*
backend
,
void
*
state
);
///
/// TRITONBACKEND_Model
///
/// Object representing a model implemented using the backend.
///
/// Get the name of the model. The returned string is owned by the
/// model object, not the caller, and so should not be modified or
/// freed.
///
/// \param model The model.
/// \param name Returns the model name.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelName
(
TRITONBACKEND_Model
*
model
,
const
char
**
name
);
/// Get the version of the model.
///
/// \param model The model.
/// \param version Returns the model version.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelVersion
(
TRITONBACKEND_Model
*
model
,
uint64_t
*
version
);
/// Get the location of the files that make up the model. The
/// 'location' communicated depends on how the model is being
/// communicated to Triton as indicated by 'artifact_type'.
///
/// TRITONBACKEND_ARTIFACT_FILESYSTEM: The model artifacts are made
/// available to Triton via the local filesytem. 'location'
/// returns the full path to the directory in the model repository
/// that contains this model's artifacts. The returned string is
/// owned by Triton, not the caller, and so should not be modified
/// or freed.
///
/// \param model The model.
/// \param artifact_type Returns the artifact type for the model.
/// \param path Returns the location.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelRepository
(
TRITONBACKEND_Model
*
model
,
TRITONBACKEND_ArtifactType
*
artifact_type
,
const
char
**
location
);
/// Get the model configuration. The caller takes ownership of the
/// message object and must call TRITONSERVER_MessageDelete to release
/// the object. The configuration is available via this call even
/// before the model is loaded and so can be used in
/// TRITONBACKEND_ModelInitialize. TRITONSERVER_ServerModelConfig
/// returns equivalent information but is not useable until after the
/// model loads.
///
/// \param model The model.
/// \param config_version The model configuration will be returned in
/// a format matching this version. If the configuration cannot be
/// represented in the requested version's format then an error will
/// be returned. Currently only version 1 is supported.
/// \param model_config Returns the model configuration as a message.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelConfig
(
TRITONBACKEND_Model
*
model
,
const
uint32_t
config_version
,
TRITONSERVER_Message
**
model_config
);
/// Whether the backend should attempt to auto-complete the model configuration.
/// If true, the model should fill the inputs, outputs, and max batch size in
/// the model configuration if incomplete. If the model configuration is
/// changed, the new configuration must be reported to Triton using
/// TRITONBACKEND_ModelSetConfig.
///
/// \param model The model.
/// \param auto_complete_config Returns whether the backend should auto-complete
/// the model configuration.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelAutoCompleteConfig
(
TRITONBACKEND_Model
*
model
,
bool
*
auto_complete_config
);
/// Set the model configuration in Triton server. This API should only be called
/// when the backend implements the auto-completion of model configuration
/// and TRITONBACKEND_ModelAutoCompleteConfig returns true in
/// auto_complete_config. Only the inputs, outputs, max batch size, and
/// scheduling choice can be changed. A caveat being scheduling choice can only
/// be changed if none is previously set. Any other changes to the model
/// configuration will be ignored by Triton. This function can only be called
/// from TRITONBACKEND_ModelInitialize, calling in any other context will result
/// in an error being returned. Additionally, Triton server can add some of the
/// missing fields in the provided config with this call. The backend must get
/// the complete configuration again by using TRITONBACKEND_ModelConfig.
/// TRITONBACKEND_ModelSetConfig does not take ownership of the message object
/// and so the caller should call TRITONSERVER_MessageDelete to release the
/// object once the function returns.
///
/// \param model The model.
/// \param config_version The format version of the model configuration.
/// If the configuration is not represented in the version's format
/// then an error will be returned. Currently only version 1 is supported.
/// \param model_config The updated model configuration as a message.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelSetConfig
(
TRITONBACKEND_Model
*
model
,
const
uint32_t
config_version
,
TRITONSERVER_Message
*
model_config
);
/// Get the TRITONSERVER_Server object that this model is being served
/// by.
///
/// \param model The model.
/// \param server Returns the server.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelServer
(
TRITONBACKEND_Model
*
model
,
TRITONSERVER_Server
**
server
);
/// Get the backend used by the model.
///
/// \param model The model.
/// \param model Returns the backend object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelBackend
(
TRITONBACKEND_Model
*
model
,
TRITONBACKEND_Backend
**
backend
);
/// Get the user-specified state associated with the model. The
/// state is completely owned and managed by the backend.
///
/// \param model The model.
/// \param state Returns the user state, or nullptr if no user state.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelState
(
TRITONBACKEND_Model
*
model
,
void
**
state
);
/// Set the user-specified state associated with the model. The
/// state is completely owned and managed by the backend.
///
/// \param model The model.
/// \param state The user state, or nullptr if no user state.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelSetState
(
TRITONBACKEND_Model
*
model
,
void
*
state
);
///
/// TRITONBACKEND_ModelInstance
///
/// Object representing a model instance implemented using the
/// backend.
///
/// Get the name of the model instance. The returned string is owned by the
/// model object, not the caller, and so should not be modified or
/// freed.
///
/// \param instance The model instance.
/// \param name Returns the instance name.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceName
(
TRITONBACKEND_ModelInstance
*
instance
,
const
char
**
name
);
/// Get the kind of the model instance.
///
/// \param instance The model instance.
/// \param kind Returns the instance kind.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceKind
(
TRITONBACKEND_ModelInstance
*
instance
,
TRITONSERVER_InstanceGroupKind
*
kind
);
/// Get the device ID of the model instance.
///
/// \param instance The model instance.
/// \param device_id Returns the instance device ID.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceDeviceId
(
TRITONBACKEND_ModelInstance
*
instance
,
int32_t
*
device_id
);
/// Get the host policy setting. The 'host_policy' message is
/// owned by Triton and should not be modified or freed by the caller.
///
/// The host policy setting, as JSON, is:
///
/// {
/// "<host_policy>" : {
/// "<setting>" : "<value>",
/// ...
/// }
/// }
///
/// \param instance The model instance.
/// \param host_policy Returns the host policy setting as a message.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceHostPolicy
(
TRITONBACKEND_ModelInstance
*
instance
,
TRITONSERVER_Message
**
host_policy
);
/// Whether the model instance is passive.
///
/// \param instance The model instance.
/// \param is_passive Returns true if the instance is passive, false otherwise
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceIsPassive
(
TRITONBACKEND_ModelInstance
*
instance
,
bool
*
is_passive
);
/// Get the number of optimization profiles to be loaded for the instance.
///
/// \param instance The model instance.
/// \param count Returns the number of optimization profiles.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceProfileCount
(
TRITONBACKEND_ModelInstance
*
instance
,
uint32_t
*
count
);
/// Get the name of optimization profile. The caller does not own
/// the returned string and must not modify or delete it. The lifetime
/// of the returned string extends only as long as 'instance'.
///
/// \param instance The model instance.
/// \param index The index of the optimization profile. Must be 0
/// <= index < count, where count is the value returned by
/// TRITONBACKEND_ModelInstanceProfileCount.
/// \param profile_name Returns the name of the optimization profile
/// corresponding to the index.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceProfileName
(
TRITONBACKEND_ModelInstance
*
instance
,
const
uint32_t
index
,
const
char
**
profile_name
);
/// Get the number of secondary devices configured for the instance.
///
/// \param instance The model instance.
/// \param count Returns the number of secondary devices.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceSecondaryDeviceCount
(
TRITONBACKEND_ModelInstance
*
instance
,
uint32_t
*
count
);
/// Get the properties of indexed secondary device. The returned
/// strings and other properties are owned by the instance, not the
/// caller, and so should not be modified or freed.
///
/// \param instance The model instance.
/// \param index The index of the secondary device. Must be 0
/// <= index < count, where count is the value returned by
/// TRITONBACKEND_ModelInstanceSecondaryDeviceCount.
/// \param kind Returns the kind of secondary device corresponding
/// to the index.
/// \param id Returns the id of secondary device corresponding to the index.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceSecondaryDeviceProperties
(
TRITONBACKEND_ModelInstance
*
instance
,
uint32_t
index
,
const
char
**
kind
,
int64_t
*
id
);
/// Get the model associated with a model instance.
///
/// \param instance The model instance.
/// \param backend Returns the model object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceModel
(
TRITONBACKEND_ModelInstance
*
instance
,
TRITONBACKEND_Model
**
model
);
/// Get the user-specified state associated with the model
/// instance. The state is completely owned and managed by the
/// backend.
///
/// \param instance The model instance.
/// \param state Returns the user state, or nullptr if no user state.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceState
(
TRITONBACKEND_ModelInstance
*
instance
,
void
**
state
);
/// Set the user-specified state associated with the model
/// instance. The state is completely owned and managed by the
/// backend.
///
/// \param instance The model instance.
/// \param state The user state, or nullptr if no user state.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceSetState
(
TRITONBACKEND_ModelInstance
*
instance
,
void
*
state
);
/// Record statistics for an inference request.
///
/// Set 'success' true to indicate that the inference request
/// completed successfully. In this case all timestamps should be
/// non-zero values reported in nanoseconds and should be collected
/// using std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
/// Set 'success' to false to indicate that the inference request failed
/// to complete successfully. In this case all timestamps values are
/// ignored.
///
/// For consistency of measurement across different backends, the
/// timestamps should be collected at the following points during
/// TRITONBACKEND_ModelInstanceExecute.
///
/// TRITONBACKEND_ModelInstanceExecute()
/// CAPTURE TIMESPACE (exec_start_ns)
/// < process input tensors to prepare them for inference
/// execution, including copying the tensors to/from GPU if
/// necessary>
/// CAPTURE TIMESPACE (compute_start_ns)
/// < perform inference computations to produce outputs >
/// CAPTURE TIMESPACE (compute_end_ns)
/// < allocate output buffers and extract output tensors, including
/// copying the tensors to/from GPU if necessary>
/// CAPTURE TIMESPACE (exec_end_ns)
/// return
///
/// Note that these statistics are associated with a valid
/// TRITONBACKEND_Request object and so must be reported before the
/// request is released. For backends that release the request before
/// all response(s) are sent, these statistics cannot capture
/// information about the time required to produce the response.
///
/// \param instance The model instance.
/// \param request The inference request that statistics are being
/// reported for.
/// \param success True if the inference request completed
/// successfully, false if it failed to complete.
/// \param exec_start_ns Timestamp for the start of execution.
/// \param compute_start_ns Timestamp for the start of execution
/// computations.
/// \param compute_end_ns Timestamp for the end of execution
/// computations.
/// \param exec_end_ns Timestamp for the end of execution.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceReportStatistics
(
TRITONBACKEND_ModelInstance
*
instance
,
TRITONBACKEND_Request
*
request
,
const
bool
success
,
const
uint64_t
exec_start_ns
,
const
uint64_t
compute_start_ns
,
const
uint64_t
compute_end_ns
,
const
uint64_t
exec_end_ns
);
/// Record statistics for the execution of an entire batch of
/// inference requests.
///
/// All timestamps should be non-zero values reported in nanoseconds
/// and should be collected using
/// std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
/// See TRITONBACKEND_ModelInstanceReportStatistics for more information about
/// the timestamps.
///
/// 'batch_size' is the sum of the batch sizes for the individual
/// requests that were delivered together in the call to
/// TRITONBACKEND_ModelInstanceExecute. For example, if three requests
/// are passed to TRITONBACKEND_ModelInstanceExecute and those
/// requests have batch size 1, 2, and 3; then 'batch_size' should be
/// set to 6.
///
/// \param instance The model instance.
/// \param batch_size Combined batch size of all the individual
/// requests executed in the batch.
/// \param exec_start_ns Timestamp for the start of execution.
/// \param compute_start_ns Timestamp for the start of execution
/// computations.
/// \param compute_end_ns Timestamp for the end of execution
/// computations.
/// \param exec_end_ns Timestamp for the end of execution.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceReportBatchStatistics
(
TRITONBACKEND_ModelInstance
*
instance
,
const
uint64_t
batch_size
,
const
uint64_t
exec_start_ns
,
const
uint64_t
compute_start_ns
,
const
uint64_t
compute_end_ns
,
const
uint64_t
exec_end_ns
);
///
/// The following functions can be implemented by a backend. Functions
/// indicated as required must be implemented or the backend will fail
/// to load.
///
/// Initialize a backend. This function is optional, a backend is not
/// required to implement it. This function is called once when a
/// backend is loaded to allow the backend to initialize any state
/// associated with the backend. A backend has a single state that is
/// shared across all models that use the backend.
///
/// \param backend The backend.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_ISPEC
TRITONSERVER_Error
*
TRITONBACKEND_Initialize
(
TRITONBACKEND_Backend
*
backend
);
/// Finalize for a backend. This function is optional, a backend is
/// not required to implement it. This function is called once, just
/// before the backend is unloaded. All state associated with the
/// backend should be freed and any threads created for the backend
/// should be exited/joined before returning from this function.
///
/// \param backend The backend.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_ISPEC
TRITONSERVER_Error
*
TRITONBACKEND_Finalize
(
TRITONBACKEND_Backend
*
backend
);
/// Initialize for a model. This function is optional, a backend is
/// not required to implement it. This function is called once when a
/// model that uses the backend is loaded to allow the backend to
/// initialize any state associated with the model. The backend should
/// also examine the model configuration to determine if the
/// configuration is suitable for the backend. Any errors reported by
/// this function will prevent the model from loading.
///
/// \param model The model.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_ISPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInitialize
(
TRITONBACKEND_Model
*
model
);
/// Finalize for a model. This function is optional, a backend is not
/// required to implement it. This function is called once for a
/// model, just before the model is unloaded from Triton. All state
/// associated with the model should be freed and any threads created
/// for the model should be exited/joined before returning from this
/// function.
///
/// \param model The model.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_ISPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelFinalize
(
TRITONBACKEND_Model
*
model
);
/// Initialize for a model instance. This function is optional, a
/// backend is not required to implement it. This function is called
/// once when a model instance is created to allow the backend to
/// initialize any state associated with the instance.
///
/// \param instance The model instance.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_ISPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceInitialize
(
TRITONBACKEND_ModelInstance
*
instance
);
/// Finalize for a model instance. This function is optional, a
/// backend is not required to implement it. This function is called
/// once for an instance, just before the corresponding model is
/// unloaded from Triton. All state associated with the instance
/// should be freed and any threads created for the instance should be
/// exited/joined before returning from this function.
///
/// \param instance The model instance.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_ISPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceFinalize
(
TRITONBACKEND_ModelInstance
*
instance
);
/// Execute a batch of one or more requests on a model instance. This
/// function is required. Triton will not perform multiple
/// simultaneous calls to this function for a given model 'instance';
/// however, there may be simultaneous calls for different model
/// instances (for the same or different models).
///
/// If an error is returned the ownership of the request objects
/// remains with Triton and the backend must not retain references to
/// the request objects or access them in any way.
///
/// If success is returned, ownership of the request objects is
/// transferred to the backend and it is then responsible for creating
/// responses and releasing the request objects. Note that even though
/// ownership of the request objects is transferred to the backend, the
/// ownership of the buffer holding request pointers is returned back
/// to Triton upon return from TRITONBACKEND_ModelInstanceExecute. If
/// any request objects need to be maintained beyond
/// TRITONBACKEND_ModelInstanceExecute, then the pointers must be copied
/// out of the array within TRITONBACKEND_ModelInstanceExecute.
///
/// \param instance The model instance.
/// \param requests The requests.
/// \param request_count The number of requests in the batch.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_ISPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceExecute
(
TRITONBACKEND_ModelInstance
*
instance
,
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
);
/// Query the backend for different model attributes. This function is optional,
/// a backend is not required to implement it. The backend is also not required
/// to set all backend attribute listed. This function is called when
/// Triton requires further backend / model information to perform operations.
/// This function may be called multiple times within the lifetime of the
/// backend (between TRITONBACKEND_Initialize and TRITONBACKEND_Finalize).
/// The backend may return error to indicate failure to set the backend
/// attributes, and the attributes specified in the same function call will be
/// ignored. Triton will update the specified attributes if 'nullptr' is
/// returned.
///
/// \param backend The backend.
/// \param backend_attributes Return the backend attribute.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_ISPEC
TRITONSERVER_Error
*
TRITONBACKEND_GetBackendAttribute
(
TRITONBACKEND_Backend
*
backend
,
TRITONBACKEND_BackendAttribute
*
backend_attributes
);
/// TRITONBACKEND_BackendAttribute
///
/// API to modify attributes associated with a backend.
///
/// Add the preferred instance group of the backend. This function
/// can be called multiple times to cover different instance group kinds that
/// the backend supports, given the priority order that the first call describes
/// the most preferred group. In the case where instance group are not
/// explicitly provided, Triton will use this attribute to create model
/// deployment that aligns more with the backend preference.
///
/// \param backend_attributes The backend attributes object.
/// \param kind The kind of the instance group.
/// \param count The number of instances per device. Triton default will be used
/// if 0 is provided.
/// \param device_ids The devices where instances should be available. Triton
/// default will be used if 'nullptr' is provided.
/// \param id_count The number of devices in 'device_ids'.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendAttributeAddPreferredInstanceGroup
(
TRITONBACKEND_BackendAttribute
*
backend_attributes
,
const
TRITONSERVER_InstanceGroupKind
kind
,
const
uint64_t
count
,
const
uint64_t
*
device_ids
,
const
uint64_t
id_count
);
#ifdef __cplusplus
}
#endif
3rdparty/core-r22.12/include/triton/core/tritonrepoagent.h
deleted
100644 → 0
View file @
d592fbea
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <stddef.h>
#include <stdint.h>
#include "triton/core/tritonserver.h"
#ifdef __cplusplus
extern
"C"
{
#endif
#ifdef _COMPILING_TRITONREPOAGENT
#if defined(_MSC_VER)
#define TRITONREPOAGENT_DECLSPEC __declspec(dllexport)
#define TRITONREPOAGENT_ISPEC __declspec(dllimport)
#elif defined(__GNUC__)
#define TRITONREPOAGENT_DECLSPEC __attribute__((__visibility__("default")))
#define TRITONREPOAGENT_ISPEC
#else
#define TRITONREPOAGENT_DECLSPEC
#define TRITONREPOAGENT_ISPEC
#endif
#else
#if defined(_MSC_VER)
#define TRITONREPOAGENT_DECLSPEC __declspec(dllimport)
#define TRITONREPOAGENT_ISPEC __declspec(dllexport)
#else
#define TRITONREPOAGENT_DECLSPEC
#define TRITONREPOAGENT_ISPEC
#endif
#endif
struct
TRITONREPOAGENT_Agent
;
struct
TRITONREPOAGENT_AgentModel
;
///
/// TRITONREPOAGENT API Version
///
/// The TRITONREPOAGENT API is versioned with major and minor version
/// numbers. Any change to the API that does not impact backwards
/// compatibility (for example, adding a non-required function)
/// increases the minor version number. Any change that breaks
/// backwards compatibility (for example, deleting or changing the
/// behavior of a function) increases the major version number. A
/// repository agent should check that the API version used to compile
/// the agent is compatible with the API version of the Triton server
/// that it is running in. This is typically done by code similar to
/// the following which makes sure that the major versions are equal
/// and that the minor version of Triton is >= the minor version used
/// to build the agent.
///
/// uint32_t api_version_major, api_version_minor;
/// TRITONREPOAGENT_ApiVersion(&api_version_major, &api_version_minor);
/// if ((api_version_major != TRITONREPOAGENT_API_VERSION_MAJOR) ||
/// (api_version_minor < TRITONREPOAGENT_API_VERSION_MINOR)) {
/// return TRITONSERVER_ErrorNew(
/// TRITONSERVER_ERROR_UNSUPPORTED,
/// "triton repository agent API version does not support this agent");
/// }
///
#define TRITONREPOAGENT_API_VERSION_MAJOR 0
#define TRITONREPOAGENT_API_VERSION_MINOR 1
/// Get the TRITONREPOAGENT API version supported by Triton. This
/// value can be compared against the
/// TRITONREPOAGENT_API_VERSION_MAJOR and
/// TRITONREPOAGENT_API_VERSION_MINOR used to build the agent to
/// ensure that Triton is compatible with the agent.
///
/// \param major Returns the TRITONREPOAGENT API major version supported
/// by Triton.
/// \param minor Returns the TRITONREPOAGENT API minor version supported
/// by Triton.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_DECLSPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_ApiVersion
(
uint32_t
*
major
,
uint32_t
*
minor
);
/// TRITONREPOAGENT_ArtifactType
///
/// The ways that the files that make up a model's repository content
/// are communicated between Triton and the agent.
///
/// TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
/// communicated to and from the repository agent via a locally
/// accessible filesystem. The agent can access these files using
/// an appropriate filesystem API.
///
/// TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
/// communicated to and from the repository agent via a remote filesystem.
/// The remote filesystem path follows the same convention as is used for
/// repository paths, for example, "s3://" prefix indicates an S3 path.
///
typedef
enum
TRITONREPOAGENT_artifacttype_enum
{
TRITONREPOAGENT_ARTIFACT_FILESYSTEM
,
TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM
}
TRITONREPOAGENT_ArtifactType
;
/// TRITONREPOAGENT_ActionType
///
/// Types of repository actions that can be handled by an agent.
/// The lifecycle of a TRITONREPOAGENT_AgentModel begins with a call to
/// TRITONREPOAGENT_ModelInitialize and ends with a call to
/// TRITONREPOAGENT_ModelFinalize. Between those calls the current lifecycle
/// state of the model is communicated by calls to TRITONREPOAGENT_ModelAction.
/// Possible lifecycles are:
///
/// LOAD -> LOAD_COMPLETE -> UNLOAD -> UNLOAD_COMPLETE
/// LOAD -> LOAD_FAIL
///
/// TRITONREPOAGENT_ACTION_LOAD: A model is being loaded.
///
/// TRITONREPOAGENT_ACTION_LOAD_COMPLETE: The model load completed
/// successfully and the model is now loaded.
///
/// TRITONREPOAGENT_ACTION_LOAD_FAIL: The model load did not complete
/// successfully. The model is not loaded.
///
/// TRITONREPOAGENT_ACTION_UNLOAD: The model is being unloaded.
///
/// TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE: The model unload is complete.
///
typedef
enum
TRITONREPOAGENT_actiontype_enum
{
TRITONREPOAGENT_ACTION_LOAD
,
TRITONREPOAGENT_ACTION_LOAD_COMPLETE
,
TRITONREPOAGENT_ACTION_LOAD_FAIL
,
TRITONREPOAGENT_ACTION_UNLOAD
,
TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE
}
TRITONREPOAGENT_ActionType
;
/// Get the location of the files that make up the model. The
/// 'location' communicated depends on how the model is being
/// communicated to the agent as indicated by 'artifact_type'.
///
/// TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
/// made available to the agent via the local
/// filesytem. 'location' returns the full path to the directory
/// in the model repository that contains the model's
/// artifacts. The returned location string is owned by Triton,
/// not the caller, and so should not be modified or freed. The
/// contents of the directory are owned by Triton, not the agent,
/// and so the agent should not delete or modify the contents. Use
/// TRITONREPOAGENT_RepositoryAcquire to get a location that can be
/// used to modify the model repository contents.
///
/// TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
/// made available to the agent via a remote filesystem.
/// 'location' returns the full path to the remote directory that contains
/// the model's artifacts. The returned location string is owned by Triton,
/// not the caller, and so should not be modified or freed. The contents of
/// the remote directory are owned by Triton, not the agent,
/// and so the agent should not delete or modify the contents.
/// Use TRITONREPOAGENT_ModelRepositoryLocationAcquire to get a location
/// that can be used to write updated model repository contents.
///
/// \param agent The agent.
/// \param model The model.
/// \param artifact_type Returns the artifact type for the location.
/// \param path Returns the location.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_DECLSPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_ModelRepositoryLocation
(
TRITONREPOAGENT_Agent
*
agent
,
TRITONREPOAGENT_AgentModel
*
model
,
TRITONREPOAGENT_ArtifactType
*
artifact_type
,
const
char
**
location
);
/// Acquire a location where the agent can produce a new version of
/// the model repository files. This is a convenience method to create
/// a temporary directory for the agent. The agent is responsible for
/// calling TRITONREPOAGENT_ModelRepositoryLocationDelete in
/// TRITONREPOAGENT_ModelFinalize to delete the location. Initially the
/// acquired location is empty. The 'location' communicated depends on
/// the requested 'artifact_type'.
///
/// TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The location is a directory
/// on the local filesystem. 'location' returns the full path to
/// an empty directory that the agent should populate with the
/// model's artifacts. The returned location string is owned by
/// Triton, not the agent, and so should not be modified or freed.
///
/// \param agent The agent.
/// \param model The model.
/// \param artifact_type The artifact type for the location.
/// \param path Returns the location.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_DECLSPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_ModelRepositoryLocationAcquire
(
TRITONREPOAGENT_Agent
*
agent
,
TRITONREPOAGENT_AgentModel
*
model
,
const
TRITONREPOAGENT_ArtifactType
artifact_type
,
const
char
**
location
);
/// Discard and release ownership of a previously acquired location
/// and its contents. The agent must not access or modify the location
/// or its contents after this call.
///
/// \param agent The agent.
/// \param model The model.
/// \param path The location to release.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_DECLSPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_ModelRepositoryLocationRelease
(
TRITONREPOAGENT_Agent
*
agent
,
TRITONREPOAGENT_AgentModel
*
model
,
const
char
*
location
);
/// Inform Triton that the specified repository location should be used for
/// the model in place of the original model repository. This method can only be
/// called when TRITONREPOAGENT_ModelAction is invoked with
/// TRITONREPOAGENT_ACTION_LOAD. The 'location' The 'location'
/// communicated depends on how the repository is being
/// communicated to Triton as indicated by 'artifact_type'.
///
/// TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
/// made available to Triton via the local filesytem. 'location' returns
/// the full path to the directory. Ownership of the contents of the
/// returned directory are transferred to Triton and the agent should not
/// modified or freed the contents until TRITONREPOAGENT_ModelFinalize.
/// The local filesystem directory can be created using
/// TRITONREPOAGENT_ModelReopsitroyLocationAcquire or the agent can use
/// its own local filesystem API.
///
/// TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
/// made available to Triton via a remote filesystem. 'location' returns
/// the full path to the remote filesystem directory. Ownership of the
/// contents of the returned directory are transferred to Triton and
/// the agent should not modified or freed the contents until
/// TRITONREPOAGENT_ModelFinalize.
///
/// \param agent The agent.
/// \param model The model.
/// \param artifact_type The artifact type for the location.
/// \param path Returns the location.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_DECLSPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_ModelRepositoryUpdate
(
TRITONREPOAGENT_Agent
*
agent
,
TRITONREPOAGENT_AgentModel
*
model
,
const
TRITONREPOAGENT_ArtifactType
artifact_type
,
const
char
*
location
);
/// Get the number of agent parameters defined for a model.
///
/// \param agent The agent.
/// \param model The model.
/// \param count Returns the number of input tensors.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_DECLSPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_ModelParameterCount
(
TRITONREPOAGENT_Agent
*
agent
,
TRITONREPOAGENT_AgentModel
*
model
,
uint32_t
*
count
);
/// Get a parameter name and value. The caller does not own the
/// returned strings and must not modify or delete them.
///
/// \param agent The agent.
/// \param model The model.
/// \param index The index of the parameter. Must be 0 <= index <
/// count, where count is the value returned by
/// TRITONREPOAGENT_ModelParameterCount.
/// \param parameter_name Returns the name of the parameter.
/// \param parameter_value Returns the value of the parameter.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_DECLSPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_ModelParameter
(
TRITONREPOAGENT_Agent
*
agent
,
TRITONREPOAGENT_AgentModel
*
model
,
const
uint32_t
index
,
const
char
**
parameter_name
,
const
char
**
parameter_value
);
/// Get the model configuration. The caller takes ownership of the
/// message object and must call TRITONSERVER_MessageDelete to release
/// the object. If the model repository does not contain a
/// config.pbtxt file then 'model_config' is returned as nullptr.
///
/// \param agent The agent.
/// \param model The model.
/// \param config_version The model configuration will be returned in
/// a format matching this version. If the configuration cannot be
/// represented in the requested version's format then an error will
/// be returned. Currently only version 1 is supported.
/// \param model_config Returns the model configuration as a message.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_DECLSPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_ModelConfig
(
TRITONREPOAGENT_Agent
*
agent
,
TRITONREPOAGENT_AgentModel
*
model
,
const
uint32_t
config_version
,
TRITONSERVER_Message
**
model_config
);
/// Get the user-specified state associated with the model.
///
/// \param model The agent model.
/// \param state Returns the user state, or nullptr if no user state.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_DECLSPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_ModelState
(
TRITONREPOAGENT_AgentModel
*
model
,
void
**
state
);
/// Set the user-specified state associated with the model.
///
/// \param model The agent model.
/// \param state The user state, or nullptr if no user state.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_DECLSPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_ModelSetState
(
TRITONREPOAGENT_AgentModel
*
model
,
void
*
state
);
/// Get the user-specified state associated with the agent.
///
/// \param agent The agent.
/// \param state Returns the user state, or nullptr if no user state.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_DECLSPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_State
(
TRITONREPOAGENT_Agent
*
agent
,
void
**
state
);
/// Set the user-specified state associated with the agent.
///
/// \param agent The agent.
/// \param state The user state, or nullptr if no user state.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_DECLSPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_SetState
(
TRITONREPOAGENT_Agent
*
agent
,
void
*
state
);
///
/// The following functions can be implemented by an agent. Functions
/// indicated as required must be implemented or the agent will fail
/// to load.
///
/// Initialize an agent. This function is optional. This function is
/// called once when an agent is loaded to allow the agent to
/// initialize any state associated with the agent. An agent has a
/// single state that is shared across all invocations of the agent.
///
/// \param agent The agent.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_ISPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_Initialize
(
TRITONREPOAGENT_Agent
*
agent
);
/// Finalize for an agent. This function is optional. This function is
/// called once, just before the agent is unloaded. All state
/// associated with the agent should be freed and any threads created
/// for the agent should be exited/joined before returning from this
/// function.
///
/// \param agent The agent.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_ISPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_Finalize
(
TRITONREPOAGENT_Agent
*
agent
);
/// Initialize a model associated with an agent. This function is optional.
/// This function is called once when an agent model's lifecycle begins to allow
/// the agent model to initialize any state associated with it. An agent model
/// has a single state that is shared across all the lifecycle of the agent
/// model.
///
/// \param agent The agent to be associated with the model.
/// \param model The model.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_ISPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_ModelInitialize
(
TRITONREPOAGENT_Agent
*
agent
,
TRITONREPOAGENT_AgentModel
*
model
);
/// Finalize for a model. This function is optional. This function is
/// called once, just before the end of the agent model's lifecycle. All state
/// associated with the agent model should be freed and any threads created
/// for the agent model should be exited/joined before returning from this
/// function. If the model acquired a model location using
/// TRITONREPOAGENT_ModelRepositoryLocationAcquire, it must call
/// TRITONREPOAGENT_ModelRepositoryLocationRelease to release that location.
///
/// \param agent The agent associated with the model.
/// \param model The model.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_ISPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_ModelFinalize
(
TRITONREPOAGENT_Agent
*
agent
,
TRITONREPOAGENT_AgentModel
*
model
);
/// Handle an action for a specified model. This function is
/// required. Triton will not perform multiple simultaneous calls to
/// this function for a given agent and model; however, there may be
/// simultaneous calls for the agent for different models.
///
/// If the agent does not handle the action the agent should
/// immediately return success (nullptr).
///
/// Any modification to the model's repository must be made when 'action_type'
/// is TRITONREPOAGENT_ACTION_LOAD.
/// To modify the model's repository the agent must either acquire a mutable
/// location via TRITONREPOAGENT_ModelRepositoryLocationAcquire
/// or its own managed location, report the location to Triton via
/// TRITONREPOAGENT_ModelRepositoryUpdate, and then return
/// success (nullptr). If the agent does not need to make any changes
/// to the model repository it should not call
/// TRITONREPOAGENT_ModelRepositoryUpdate and then return success.
/// To indicate that a model load should fail return a non-success status.
///
/// \param agent The agent.
/// \param model The model that is the target of the action.
/// \action_type The type of action the agent should handle for the model.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONREPOAGENT_ISPEC
TRITONSERVER_Error
*
TRITONREPOAGENT_ModelAction
(
TRITONREPOAGENT_Agent
*
agent
,
TRITONREPOAGENT_AgentModel
*
model
,
const
TRITONREPOAGENT_ActionType
action_type
);
#ifdef __cplusplus
}
#endif
3rdparty/core-r22.12/include/triton/core/tritonserver.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
/// \file
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern
"C"
{
#endif
#ifdef _COMPILING_TRITONSERVER
#if defined(_MSC_VER)
#define TRITONSERVER_DECLSPEC __declspec(dllexport)
#elif defined(__GNUC__)
#define TRITONSERVER_DECLSPEC __attribute__((__visibility__("default")))
#else
#define TRITONSERVER_DECLSPEC
#endif
#else
#if defined(_MSC_VER)
#define TRITONSERVER_DECLSPEC __declspec(dllimport)
#else
#define TRITONSERVER_DECLSPEC
#endif
#endif
struct
TRITONSERVER_BufferAttributes
;
struct
TRITONSERVER_Error
;
struct
TRITONSERVER_InferenceRequest
;
struct
TRITONSERVER_InferenceResponse
;
struct
TRITONSERVER_InferenceTrace
;
struct
TRITONSERVER_Message
;
struct
TRITONSERVER_Metrics
;
struct
TRITONSERVER_Parameter
;
struct
TRITONSERVER_ResponseAllocator
;
struct
TRITONSERVER_Server
;
struct
TRITONSERVER_ServerOptions
;
struct
TRITONSERVER_Metric
;
struct
TRITONSERVER_MetricFamily
;
///
/// TRITONSERVER API Version
///
/// The TRITONSERVER API is versioned with major and minor version
/// numbers. Any change to the API that does not impact backwards
/// compatibility (for example, adding a non-required function)
/// increases the minor version number. Any change that breaks
/// backwards compatibility (for example, deleting or changing the
/// behavior of a function) increases the major version number. A
/// client should check that the API version used to compile the
/// client is compatible with the API version of the Triton shared
/// library that it is linking against. This is typically done by code
/// similar to the following which makes sure that the major versions
/// are equal and that the minor version of the Triton shared library
/// is >= the minor version used to build the client.
///
/// uint32_t api_version_major, api_version_minor;
/// TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor);
/// if ((api_version_major != TRITONSERVER_API_VERSION_MAJOR) ||
/// (api_version_minor < TRITONSERVER_API_VERSION_MINOR)) {
/// return TRITONSERVER_ErrorNew(
/// TRITONSERVER_ERROR_UNSUPPORTED,
/// "triton server API version does not support this client");
/// }
///
#define TRITONSERVER_API_VERSION_MAJOR 1
#define TRITONSERVER_API_VERSION_MINOR 17
/// Get the TRITONBACKEND API version supported by the Triton shared
/// library. This value can be compared against the
/// TRITONSERVER_API_VERSION_MAJOR and TRITONSERVER_API_VERSION_MINOR
/// used to build the client to ensure that Triton shared library is
/// compatible with the client.
///
/// \param major Returns the TRITONSERVER API major version supported
/// by Triton.
/// \param minor Returns the TRITONSERVER API minor version supported
/// by Triton.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ApiVersion
(
uint32_t
*
major
,
uint32_t
*
minor
);
/// TRITONSERVER_DataType
///
/// Tensor data types recognized by TRITONSERVER.
///
typedef
enum
TRITONSERVER_datatype_enum
{
TRITONSERVER_TYPE_INVALID
,
TRITONSERVER_TYPE_BOOL
,
TRITONSERVER_TYPE_UINT8
,
TRITONSERVER_TYPE_UINT16
,
TRITONSERVER_TYPE_UINT32
,
TRITONSERVER_TYPE_UINT64
,
TRITONSERVER_TYPE_INT8
,
TRITONSERVER_TYPE_INT16
,
TRITONSERVER_TYPE_INT32
,
TRITONSERVER_TYPE_INT64
,
TRITONSERVER_TYPE_FP16
,
TRITONSERVER_TYPE_FP32
,
TRITONSERVER_TYPE_FP64
,
TRITONSERVER_TYPE_BYTES
,
TRITONSERVER_TYPE_BF16
}
TRITONSERVER_DataType
;
/// Get the string representation of a data type. The returned string
/// is not owned by the caller and so should not be modified or freed.
///
/// \param datatype The data type.
/// \return The string representation of the data type.
TRITONSERVER_DECLSPEC
const
char
*
TRITONSERVER_DataTypeString
(
TRITONSERVER_DataType
datatype
);
/// Get the Triton datatype corresponding to a string representation
/// of a datatype.
///
/// \param dtype The datatype string representation.
/// \return The Triton data type or TRITONSERVER_TYPE_INVALID if the
/// string does not represent a data type.
TRITONSERVER_DECLSPEC
TRITONSERVER_DataType
TRITONSERVER_StringToDataType
(
const
char
*
dtype
);
/// Get the size of a Triton datatype in bytes. Zero is returned for
/// TRITONSERVER_TYPE_BYTES because it have variable size. Zero is
/// returned for TRITONSERVER_TYPE_INVALID.
///
/// \param dtype The datatype.
/// \return The size of the datatype.
TRITONSERVER_DECLSPEC
uint32_t
TRITONSERVER_DataTypeByteSize
(
TRITONSERVER_DataType
datatype
);
/// TRITONSERVER_MemoryType
///
/// Types of memory recognized by TRITONSERVER.
///
typedef
enum
TRITONSERVER_memorytype_enum
{
TRITONSERVER_MEMORY_CPU
,
TRITONSERVER_MEMORY_CPU_PINNED
,
TRITONSERVER_MEMORY_GPU
}
TRITONSERVER_MemoryType
;
/// Get the string representation of a memory type. The returned
/// string is not owned by the caller and so should not be modified or
/// freed.
///
/// \param memtype The memory type.
/// \return The string representation of the memory type.
TRITONSERVER_DECLSPEC
const
char
*
TRITONSERVER_MemoryTypeString
(
TRITONSERVER_MemoryType
memtype
);
/// TRITONSERVER_ParameterType
///
/// Types of parameters recognized by TRITONSERVER.
///
typedef
enum
TRITONSERVER_parametertype_enum
{
TRITONSERVER_PARAMETER_STRING
,
TRITONSERVER_PARAMETER_INT
,
TRITONSERVER_PARAMETER_BOOL
,
TRITONSERVER_PARAMETER_BYTES
}
TRITONSERVER_ParameterType
;
/// Get the string representation of a parameter type. The returned
/// string is not owned by the caller and so should not be modified or
/// freed.
///
/// \param paramtype The parameter type.
/// \return The string representation of the parameter type.
TRITONSERVER_DECLSPEC
const
char
*
TRITONSERVER_ParameterTypeString
(
TRITONSERVER_ParameterType
paramtype
);
/// Create a new parameter object. The caller takes ownership of the
/// TRITONSERVER_Parameter object and must call TRITONSERVER_ParameterDelete to
/// release the object. The object will maintain its own copy of the 'value'
///
/// \param name The parameter name.
/// \param type The parameter type.
/// \param value The pointer to the value.
/// \return A new TRITONSERVER_Parameter object. 'nullptr' will be returned if
/// 'type' is 'TRITONSERVER_PARAMETER_BYTES'. The caller should use
/// TRITONSERVER_ParameterBytesNew to create parameter with bytes type.
TRITONSERVER_DECLSPEC
TRITONSERVER_Parameter
*
TRITONSERVER_ParameterNew
(
const
char
*
name
,
const
TRITONSERVER_ParameterType
type
,
const
void
*
value
);
/// Create a new parameter object with type TRITONSERVER_PARAMETER_BYTES.
/// The caller takes ownership of the TRITONSERVER_Parameter object and must
/// call TRITONSERVER_ParameterDelete to release the object. The object only
/// maintains a shallow copy of the 'byte_ptr' so the data content must be
/// valid until the parameter object is deleted.
///
/// \param name The parameter name.
/// \param byte_ptr The pointer to the data content.
/// \param size The size of the data content.
/// \return A new TRITONSERVER_Error object.
TRITONSERVER_DECLSPEC
TRITONSERVER_Parameter
*
TRITONSERVER_ParameterBytesNew
(
const
char
*
name
,
const
void
*
byte_ptr
,
const
uint64_t
size
);
/// Delete an parameter object.
///
/// \param parameter The parameter object.
TRITONSERVER_DECLSPEC
void
TRITONSERVER_ParameterDelete
(
TRITONSERVER_Parameter
*
parameter
);
/// TRITONSERVER_InstanceGroupKind
///
/// Kinds of instance groups recognized by TRITONSERVER.
///
typedef
enum
TRITONSERVER_instancegroupkind_enum
{
TRITONSERVER_INSTANCEGROUPKIND_AUTO
,
TRITONSERVER_INSTANCEGROUPKIND_CPU
,
TRITONSERVER_INSTANCEGROUPKIND_GPU
,
TRITONSERVER_INSTANCEGROUPKIND_MODEL
}
TRITONSERVER_InstanceGroupKind
;
/// Get the string representation of an instance-group kind. The
/// returned string is not owned by the caller and so should not be
/// modified or freed.
///
/// \param kind The instance-group kind.
/// \return The string representation of the kind.
TRITONSERVER_DECLSPEC
const
char
*
TRITONSERVER_InstanceGroupKindString
(
TRITONSERVER_InstanceGroupKind
kind
);
/// TRITONSERVER_Logging
///
/// Types/levels of logging.
///
typedef
enum
TRITONSERVER_loglevel_enum
{
TRITONSERVER_LOG_INFO
,
TRITONSERVER_LOG_WARN
,
TRITONSERVER_LOG_ERROR
,
TRITONSERVER_LOG_VERBOSE
}
TRITONSERVER_LogLevel
;
///
/// Format of logging.
///
/// TRITONSERVER_LOG_DEFAULT: the log severity (L) and timestamp will be
/// logged as "LMMDD hh:mm:ss.ssssss".
///
/// TRITONSERVER_LOG_ISO8601: the log format will be "YYYY-MM-DDThh:mm:ssZ L".
///
typedef
enum
TRITONSERVER_logformat_enum
{
TRITONSERVER_LOG_DEFAULT
,
TRITONSERVER_LOG_ISO8601
}
TRITONSERVER_LogFormat
;
/// Is a log level enabled?
///
/// \param level The log level.
/// \return True if the log level is enabled, false if not enabled.
TRITONSERVER_DECLSPEC
bool
TRITONSERVER_LogIsEnabled
(
TRITONSERVER_LogLevel
level
);
/// Log a message at a given log level if that level is enabled.
///
/// \param level The log level.
/// \param filename The file name of the location of the log message.
/// \param line The line number of the log message.
/// \param msg The log message.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_LogMessage
(
TRITONSERVER_LogLevel
level
,
const
char
*
filename
,
const
int
line
,
const
char
*
msg
);
/// TRITONSERVER_Error
///
/// Errors are reported by a TRITONSERVER_Error object. A NULL
/// TRITONSERVER_Error indicates no error, a non-NULL TRITONSERVER_Error
/// indicates error and the code and message for the error can be
/// retrieved from the object.
///
/// The caller takes ownership of a TRITONSERVER_Error object returned by
/// the API and must call TRITONSERVER_ErrorDelete to release the object.
///
/// The TRITONSERVER_Error error codes
typedef
enum
TRITONSERVER_errorcode_enum
{
TRITONSERVER_ERROR_UNKNOWN
,
TRITONSERVER_ERROR_INTERNAL
,
TRITONSERVER_ERROR_NOT_FOUND
,
TRITONSERVER_ERROR_INVALID_ARG
,
TRITONSERVER_ERROR_UNAVAILABLE
,
TRITONSERVER_ERROR_UNSUPPORTED
,
TRITONSERVER_ERROR_ALREADY_EXISTS
}
TRITONSERVER_Error_Code
;
/// Create a new error object. The caller takes ownership of the
/// TRITONSERVER_Error object and must call TRITONSERVER_ErrorDelete to
/// release the object.
///
/// \param code The error code.
/// \param msg The error message.
/// \return A new TRITONSERVER_Error object.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ErrorNew
(
TRITONSERVER_Error_Code
code
,
const
char
*
msg
);
/// Delete an error object.
///
/// \param error The error object.
TRITONSERVER_DECLSPEC
void
TRITONSERVER_ErrorDelete
(
TRITONSERVER_Error
*
error
);
/// Get the error code.
///
/// \param error The error object.
/// \return The error code.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error_Code
TRITONSERVER_ErrorCode
(
TRITONSERVER_Error
*
error
);
/// Get the string representation of an error code. The returned
/// string is not owned by the caller and so should not be modified or
/// freed. The lifetime of the returned string extends only as long as
/// 'error' and must not be accessed once 'error' is deleted.
///
/// \param error The error object.
/// \return The string representation of the error code.
TRITONSERVER_DECLSPEC
const
char
*
TRITONSERVER_ErrorCodeString
(
TRITONSERVER_Error
*
error
);
/// Get the error message. The returned string is not owned by the
/// caller and so should not be modified or freed. The lifetime of the
/// returned string extends only as long as 'error' and must not be
/// accessed once 'error' is deleted.
///
/// \param error The error object.
/// \return The error message.
TRITONSERVER_DECLSPEC
const
char
*
TRITONSERVER_ErrorMessage
(
TRITONSERVER_Error
*
error
);
/// TRITONSERVER_ResponseAllocator
///
/// Object representing a memory allocator for output tensors in an
/// inference response.
///
/// Type for allocation function that allocates a buffer to hold an
/// output tensor.
///
/// \param allocator The allocator that is provided in the call to
/// TRITONSERVER_InferenceRequestSetResponseCallback.
/// \param tensor_name The name of the output tensor to allocate for.
/// \param byte_size The size of the buffer to allocate.
/// \param memory_type The type of memory that the caller prefers for
/// the buffer allocation.
/// \param memory_type_id The ID of the memory that the caller prefers
/// for the buffer allocation.
/// \param userp The user data pointer that is provided as
/// 'response_allocator_userp' in the call to
/// TRITONSERVER_InferenceRequestSetResponseCallback.
/// \param buffer Returns a pointer to the allocated memory.
/// \param buffer_userp Returns a user-specified value to associate
/// with the buffer, or nullptr if no user-specified value should be
/// associated with the buffer. This value will be provided in the
/// call to TRITONSERVER_ResponseAllocatorReleaseFn_t when the buffer
/// is released and will also be returned by
/// TRITONSERVER_InferenceResponseOutput.
/// \param actual_memory_type Returns the type of memory where the
/// allocation resides. May be different than the type of memory
/// requested by 'memory_type'.
/// \param actual_memory_type_id Returns the ID of the memory where
/// the allocation resides. May be different than the ID of the memory
/// requested by 'memory_type_id'.
/// \return a TRITONSERVER_Error object if a failure occurs while
/// attempting an allocation. If an error is returned all other return
/// values will be ignored.
typedef
TRITONSERVER_Error
*
(
*
TRITONSERVER_ResponseAllocatorAllocFn_t
)(
TRITONSERVER_ResponseAllocator
*
allocator
,
const
char
*
tensor_name
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
,
void
*
userp
,
void
**
buffer
,
void
**
buffer_userp
,
TRITONSERVER_MemoryType
*
actual_memory_type
,
int64_t
*
actual_memory_type_id
);
/// Type for allocation function that allocates a buffer to hold an
/// output tensor with buffer attributes. The callback function must fill in the
/// appropriate buffer attributes information related to this buffer. If set,
/// this function is always called after TRITONSERVER_ResponseAllocatorAllocFn_t
/// function.
///
/// \param allocator The allocator that is provided in the call to
/// TRITONSERVER_InferenceRequestSetResponseCallback.
/// \param tensor_name The name of the output tensor to allocate for.
/// \param buffer_attributes The buffer attributes associated with the buffer.
/// \param userp The user data pointer that is provided as
/// 'response_allocator_userp' in the call to
/// TRITONSERVER_InferenceRequestSetResponseCallback.
/// \param buffer_userp Returns a user-specified value to associate
/// with the buffer, or nullptr if no user-specified value should be
/// associated with the buffer. This value will be provided in the
/// call to TRITONSERVER_ResponseAllocatorReleaseFn_t when the buffer
/// is released and will also be returned by
/// TRITONSERVER_InferenceResponseOutput.
/// \return a TRITONSERVER_Error object if a failure occurs while
/// attempting an allocation. If an error is returned all other return
/// values will be ignored.
typedef
TRITONSERVER_Error
*
(
*
TRITONSERVER_ResponseAllocatorBufferAttributesFn_t
)(
TRITONSERVER_ResponseAllocator
*
allocator
,
const
char
*
tensor_name
,
TRITONSERVER_BufferAttributes
*
buffer_attributes
,
void
*
userp
,
void
*
buffer_userp
);
/// Type for function that is called to query the allocator's preferred memory
/// type and memory type ID. As much as possible, the allocator should attempt
/// to return the same memory_type and memory_type_id values that will be
/// returned by the subsequent call to TRITONSERVER_ResponseAllocatorAllocFn_t.
/// But the allocator is not required to do so.
///
/// \param allocator The allocator that is provided in the call to
/// TRITONSERVER_InferenceRequestSetResponseCallback.
/// \param userp The user data pointer that is provided as
/// 'response_allocator_userp' in the call to
/// TRITONSERVER_InferenceRequestSetResponseCallback.
/// \param tensor_name The name of the output tensor. This is optional
/// and it should be set to nullptr to indicate that the tensor name has
/// not determined.
/// \param byte_size The expected size of the buffer. This is optional
/// and it should be set to nullptr to indicate that the byte size has
/// not determined.
/// \param memory_type Acts as both input and output. On input gives
/// the memory type preferred by the caller. Returns memory type preferred
/// by the allocator, taken account of the caller preferred type.
/// \param memory_type_id Acts as both input and output. On input gives
/// the memory type ID preferred by the caller. Returns memory type ID preferred
/// by the allocator, taken account of the caller preferred type ID.
/// \return a TRITONSERVER_Error object if a failure occurs.
typedef
TRITONSERVER_Error
*
(
*
TRITONSERVER_ResponseAllocatorQueryFn_t
)(
TRITONSERVER_ResponseAllocator
*
allocator
,
void
*
userp
,
const
char
*
tensor_name
,
size_t
*
byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
);
/// Type for function that is called when the server no longer holds
/// any reference to a buffer allocated by
/// TRITONSERVER_ResponseAllocatorAllocFn_t. In practice this function
/// is typically called when the response object associated with the
/// buffer is deleted by TRITONSERVER_InferenceResponseDelete.
///
/// \param allocator The allocator that is provided in the call to
/// TRITONSERVER_InferenceRequestSetResponseCallback.
/// \param buffer Pointer to the buffer to be freed.
/// \param buffer_userp The user-specified value associated
/// with the buffer in TRITONSERVER_ResponseAllocatorAllocFn_t.
/// \param byte_size The size of the buffer.
/// \param memory_type The type of memory holding the buffer.
/// \param memory_type_id The ID of the memory holding the buffer.
/// \return a TRITONSERVER_Error object if a failure occurs while
/// attempting the release. If an error is returned Triton will not
/// attempt to release the buffer again.
typedef
TRITONSERVER_Error
*
(
*
TRITONSERVER_ResponseAllocatorReleaseFn_t
)(
TRITONSERVER_ResponseAllocator
*
allocator
,
void
*
buffer
,
void
*
buffer_userp
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
);
/// Type for function that is called to indicate that subsequent
/// allocation requests will refer to a new response.
///
/// \param allocator The allocator that is provided in the call to
/// TRITONSERVER_InferenceRequestSetResponseCallback.
/// \param userp The user data pointer that is provided as
/// 'response_allocator_userp' in the call to
/// TRITONSERVER_InferenceRequestSetResponseCallback.
/// \return a TRITONSERVER_Error object if a failure occurs.
typedef
TRITONSERVER_Error
*
(
*
TRITONSERVER_ResponseAllocatorStartFn_t
)(
TRITONSERVER_ResponseAllocator
*
allocator
,
void
*
userp
);
/// Create a new response allocator object.
///
/// The response allocator object is used by Triton to allocate
/// buffers to hold the output tensors in inference responses. Most
/// models generate a single response for each inference request
/// (TRITONSERVER_TXN_ONE_TO_ONE). For these models the order of
/// callbacks will be:
///
/// TRITONSERVER_ServerInferAsync called
/// - start_fn : optional (and typically not required)
/// - alloc_fn : called once for each output tensor in response
/// TRITONSERVER_InferenceResponseDelete called
/// - release_fn: called once for each output tensor in response
///
/// For models that generate multiple responses for each inference
/// request (TRITONSERVER_TXN_DECOUPLED), the start_fn callback can be
/// used to determine sets of alloc_fn callbacks that belong to the
/// same response:
///
/// TRITONSERVER_ServerInferAsync called
/// - start_fn
/// - alloc_fn : called once for each output tensor in response
/// - start_fn
/// - alloc_fn : called once for each output tensor in response
/// ...
/// For each response, TRITONSERVER_InferenceResponseDelete called
/// - release_fn: called once for each output tensor in the response
///
/// In all cases the start_fn, alloc_fn and release_fn callback
/// functions must be thread-safe. Typically making these functions
/// thread-safe does not require explicit locking. The recommended way
/// to implement these functions is to have each inference request
/// provide a 'response_allocator_userp' object that is unique to that
/// request with TRITONSERVER_InferenceRequestSetResponseCallback. The
/// callback functions then operate only on this unique state. Locking
/// is required only when the callback function needs to access state
/// that is shared across inference requests (for example, a common
/// allocation pool).
///
/// \param allocator Returns the new response allocator object.
/// \param alloc_fn The function to call to allocate buffers for result
/// tensors.
/// \param release_fn The function to call when the server no longer
/// holds a reference to an allocated buffer.
/// \param start_fn The function to call to indicate that the
/// subsequent 'alloc_fn' calls are for a new response. This callback
/// is optional (use nullptr to indicate that it should not be
/// invoked).
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ResponseAllocatorNew
(
TRITONSERVER_ResponseAllocator
**
allocator
,
TRITONSERVER_ResponseAllocatorAllocFn_t
alloc_fn
,
TRITONSERVER_ResponseAllocatorReleaseFn_t
release_fn
,
TRITONSERVER_ResponseAllocatorStartFn_t
start_fn
);
/// Set the buffer attributes function for a response allocator object.
/// The function will be called after alloc_fn to set the buffer attributes
/// associated with the output buffer.
///
/// The thread-safy requirement for buffer_attributes_fn is the same as other
/// allocator callbacks.
///
/// \param allocator The response allocator object.
/// \param buffer_attributes_fn The function to call to get the buffer
/// attributes information for an allocated buffer.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction
(
TRITONSERVER_ResponseAllocator
*
allocator
,
TRITONSERVER_ResponseAllocatorBufferAttributesFn_t
buffer_attributes_fn
);
/// Set the query function to a response allocator object. Usually the
/// function will be called before alloc_fn to understand what is the
/// allocator's preferred memory type and memory type ID at the current
/// situation to make different execution decision.
///
/// The thread-safy requirement for query_fn is the same as other allocator
/// callbacks.
///
/// \param allocator The response allocator object.
/// \param query_fn The function to call to query allocator's preferred memory
/// type and memory type ID.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ResponseAllocatorSetQueryFunction
(
TRITONSERVER_ResponseAllocator
*
allocator
,
TRITONSERVER_ResponseAllocatorQueryFn_t
query_fn
);
/// Delete a response allocator.
///
/// \param allocator The response allocator object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ResponseAllocatorDelete
(
TRITONSERVER_ResponseAllocator
*
allocator
);
/// TRITONSERVER_Message
///
/// Object representing a Triton Server message.
///
/// Create a new message object from serialized JSON string.
///
/// \param message The message object.
/// \param base The base of the serialized JSON.
/// \param byte_size The size, in bytes, of the serialized message.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_MessageNewFromSerializedJson
(
TRITONSERVER_Message
**
message
,
const
char
*
base
,
size_t
byte_size
);
/// Delete a message object.
///
/// \param message The message object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_MessageDelete
(
TRITONSERVER_Message
*
message
);
/// Get the base and size of the buffer containing the serialized
/// message in JSON format. The buffer is owned by the
/// TRITONSERVER_Message object and should not be modified or freed by
/// the caller. The lifetime of the buffer extends only as long as
/// 'message' and must not be accessed once 'message' is deleted.
///
/// \param message The message object.
/// \param base Returns the base of the serialized message.
/// \param byte_size Returns the size, in bytes, of the serialized
/// message.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_MessageSerializeToJson
(
TRITONSERVER_Message
*
message
,
const
char
**
base
,
size_t
*
byte_size
);
/// TRITONSERVER_Metrics
///
/// Object representing metrics.
///
/// Metric format types
typedef
enum
tritonserver_metricformat_enum
{
TRITONSERVER_METRIC_PROMETHEUS
}
TRITONSERVER_MetricFormat
;
/// Delete a metrics object.
///
/// \param metrics The metrics object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_MetricsDelete
(
TRITONSERVER_Metrics
*
metrics
);
/// Get a buffer containing the metrics in the specified format. For
/// each format the buffer contains the following:
///
/// TRITONSERVER_METRIC_PROMETHEUS: 'base' points to a single multiline
/// string (char*) that gives a text representation of the metrics in
/// prometheus format. 'byte_size' returns the length of the string
/// in bytes.
///
/// The buffer is owned by the 'metrics' object and should not be
/// modified or freed by the caller. The lifetime of the buffer
/// extends only as long as 'metrics' and must not be accessed once
/// 'metrics' is deleted.
///
/// \param metrics The metrics object.
/// \param format The format to use for the returned metrics.
/// \param base Returns a pointer to the base of the formatted
/// metrics, as described above.
/// \param byte_size Returns the size, in bytes, of the formatted
/// metrics.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_MetricsFormatted
(
TRITONSERVER_Metrics
*
metrics
,
TRITONSERVER_MetricFormat
format
,
const
char
**
base
,
size_t
*
byte_size
);
/// TRITONSERVER_InferenceTrace
///
/// Object that represents tracing for an inference request.
///
/// Trace levels. The trace level controls the type of trace
/// activities that are reported for an inference request.
///
/// Trace level values are power-of-2 and can be combined to trace
/// multiple types of activities. For example, use
/// (TRITONSERVER_TRACE_LEVEL_TIMESTAMPS |
/// TRITONSERVER_TRACE_LEVEL_TENSORS) to trace both timestamps and
/// tensors for an inference request.
///
/// TRITONSERVER_TRACE_LEVEL_MIN and TRITONSERVER_TRACE_LEVEL_MAX are
/// deprecated and should not be used.
typedef
enum
tritonserver_tracelevel_enum
{
/// Tracing disabled. No trace activities are reported.
TRITONSERVER_TRACE_LEVEL_DISABLED
=
0
,
/// Deprecated. Use TRITONSERVER_TRACE_LEVEL_TIMESTAMPS.
TRITONSERVER_TRACE_LEVEL_MIN
=
1
,
/// Deprecated. Use TRITONSERVER_TRACE_LEVEL_TIMESTAMPS.
TRITONSERVER_TRACE_LEVEL_MAX
=
2
,
/// Record timestamps for the inference request.
TRITONSERVER_TRACE_LEVEL_TIMESTAMPS
=
0x4
,
/// Record input and output tensor values for the inference request.
TRITONSERVER_TRACE_LEVEL_TENSORS
=
0x8
}
TRITONSERVER_InferenceTraceLevel
;
/// Get the string representation of a trace level. The returned
/// string is not owned by the caller and so should not be modified or
/// freed.
///
/// \param level The trace level.
/// \return The string representation of the trace level.
TRITONSERVER_DECLSPEC
const
char
*
TRITONSERVER_InferenceTraceLevelString
(
TRITONSERVER_InferenceTraceLevel
level
);
/// Trace activities
typedef
enum
tritonserver_traceactivity_enum
{
TRITONSERVER_TRACE_REQUEST_START
=
0
,
TRITONSERVER_TRACE_QUEUE_START
=
1
,
TRITONSERVER_TRACE_COMPUTE_START
=
2
,
TRITONSERVER_TRACE_COMPUTE_INPUT_END
=
3
,
TRITONSERVER_TRACE_COMPUTE_OUTPUT_START
=
4
,
TRITONSERVER_TRACE_COMPUTE_END
=
5
,
TRITONSERVER_TRACE_REQUEST_END
=
6
,
TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT
=
7
,
TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT
=
8
,
TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT
=
9
}
TRITONSERVER_InferenceTraceActivity
;
/// Get the string representation of a trace activity. The returned
/// string is not owned by the caller and so should not be modified or
/// freed.
///
/// \param activity The trace activity.
/// \return The string representation of the trace activity.
TRITONSERVER_DECLSPEC
const
char
*
TRITONSERVER_InferenceTraceActivityString
(
TRITONSERVER_InferenceTraceActivity
activity
);
/// Type for trace timeline activity callback function. This callback function
/// is used to report activity occurring for a trace. This function
/// does not take ownership of 'trace' and so any information needed
/// from that object must be copied before returning. The 'userp' data
/// is the same as what is supplied in the call to
/// TRITONSERVER_InferenceTraceNew.
typedef
void
(
*
TRITONSERVER_InferenceTraceActivityFn_t
)(
TRITONSERVER_InferenceTrace
*
trace
,
TRITONSERVER_InferenceTraceActivity
activity
,
uint64_t
timestamp_ns
,
void
*
userp
);
/// Type for trace tensor activity callback function. This callback function
/// is used to report tensor activity occurring for a trace. This function
/// does not take ownership of 'trace' and so any information needed
/// from that object must be copied before returning. The 'userp' data
/// is the same as what is supplied in the call to
/// TRITONSERVER_InferenceTraceTensorNew.
typedef
void
(
*
TRITONSERVER_InferenceTraceTensorActivityFn_t
)(
TRITONSERVER_InferenceTrace
*
trace
,
TRITONSERVER_InferenceTraceActivity
activity
,
const
char
*
name
,
TRITONSERVER_DataType
datatype
,
const
void
*
base
,
size_t
byte_size
,
const
int64_t
*
shape
,
uint64_t
dim_count
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
,
void
*
userp
);
/// Type for trace release callback function. This callback function
/// is called when all activity for the trace has completed. The
/// callback function takes ownership of the
/// TRITONSERVER_InferenceTrace object. The 'userp' data is the same
/// as what is supplied in the call to TRITONSERVER_InferenceTraceNew.
typedef
void
(
*
TRITONSERVER_InferenceTraceReleaseFn_t
)(
TRITONSERVER_InferenceTrace
*
trace
,
void
*
userp
);
/// Create a new inference trace object. The caller takes ownership of
/// the TRITONSERVER_InferenceTrace object and must call
/// TRITONSERVER_InferenceTraceDelete to release the object.
///
/// The activity callback function will be called to report activity
/// for 'trace' as well as for any child traces that are spawned by
/// 'trace', and so the activity callback must check the trace object
/// to determine specifically what activity is being reported.
///
/// The release callback is called for both 'trace' and for any child
/// traces spawned by 'trace'.
///
/// \param trace Returns the new inference trace object.
/// \param level The tracing level.
/// \param parent_id The parent trace id for this trace. A value of 0
/// indicates that there is not parent trace.
/// \param activity_fn The callback function where activity for the
/// trace is reported.
/// \param release_fn The callback function called when all activity
/// is complete for the trace.
/// \param trace_userp User-provided pointer that is delivered to
/// the activity and release callback functions.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceTraceNew
(
TRITONSERVER_InferenceTrace
**
trace
,
TRITONSERVER_InferenceTraceLevel
level
,
uint64_t
parent_id
,
TRITONSERVER_InferenceTraceActivityFn_t
activity_fn
,
TRITONSERVER_InferenceTraceReleaseFn_t
release_fn
,
void
*
trace_userp
);
/// Create a new inference trace object. The caller takes ownership of
/// the TRITONSERVER_InferenceTrace object and must call
/// TRITONSERVER_InferenceTraceDelete to release the object.
///
/// The timeline and tensor activity callback function will be called to report
/// activity for 'trace' as well as for any child traces that are spawned by
/// 'trace', and so the activity callback must check the trace object
/// to determine specifically what activity is being reported.
///
/// The release callback is called for both 'trace' and for any child
/// traces spawned by 'trace'.
///
/// \param trace Returns the new inference trace object.
/// \param level The tracing level.
/// \param parent_id The parent trace id for this trace. A value of 0
/// indicates that there is not parent trace.
/// \param activity_fn The callback function where timeline activity for the
/// trace is reported.
/// \param tensor_activity_fn The callback function where tensor activity for
/// the trace is reported.
/// \param release_fn The callback function called when all activity
/// is complete for the trace.
/// \param trace_userp User-provided pointer that is delivered to
/// the activity and release callback functions.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceTraceTensorNew
(
TRITONSERVER_InferenceTrace
**
trace
,
TRITONSERVER_InferenceTraceLevel
level
,
uint64_t
parent_id
,
TRITONSERVER_InferenceTraceActivityFn_t
activity_fn
,
TRITONSERVER_InferenceTraceTensorActivityFn_t
tensor_activity_fn
,
TRITONSERVER_InferenceTraceReleaseFn_t
release_fn
,
void
*
trace_userp
);
/// Delete a trace object.
///
/// \param trace The trace object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceTraceDelete
(
TRITONSERVER_InferenceTrace
*
trace
);
/// Get the id associated with a trace. Every trace is assigned an id
/// that is unique across all traces created for a Triton server.
///
/// \param trace The trace.
/// \param id Returns the id associated with the trace.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceTraceId
(
TRITONSERVER_InferenceTrace
*
trace
,
uint64_t
*
id
);
/// Get the parent id associated with a trace. The parent id indicates
/// a parent-child relationship between two traces. A parent id value
/// of 0 indicates that there is no parent trace.
///
/// \param trace The trace.
/// \param id Returns the parent id associated with the trace.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceTraceParentId
(
TRITONSERVER_InferenceTrace
*
trace
,
uint64_t
*
parent_id
);
/// Get the name of the model associated with a trace. The caller does
/// not own the returned string and must not modify or delete it. The
/// lifetime of the returned string extends only as long as 'trace'.
///
/// \param trace The trace.
/// \param model_name Returns the name of the model associated with
/// the trace.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceTraceModelName
(
TRITONSERVER_InferenceTrace
*
trace
,
const
char
**
model_name
);
/// Get the version of the model associated with a trace.
///
/// \param trace The trace.
/// \param model_version Returns the version of the model associated
/// with the trace.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceTraceModelVersion
(
TRITONSERVER_InferenceTrace
*
trace
,
int64_t
*
model_version
);
/// TRITONSERVER_InferenceRequest
///
/// Object representing an inference request. The inference request
/// provides the meta-data and input tensor values needed for an
/// inference and returns the inference result meta-data and output
/// tensors. An inference request object can be modified and reused
/// multiple times.
///
/// Inference request flags. The enum values must be power-of-2 values.
typedef
enum
tritonserver_requestflag_enum
{
TRITONSERVER_REQUEST_FLAG_SEQUENCE_START
=
1
,
TRITONSERVER_REQUEST_FLAG_SEQUENCE_END
=
2
}
TRITONSERVER_RequestFlag
;
/// Inference request release flags. The enum values must be
/// power-of-2 values.
typedef
enum
tritonserver_requestreleaseflag_enum
{
TRITONSERVER_REQUEST_RELEASE_ALL
=
1
}
TRITONSERVER_RequestReleaseFlag
;
/// Inference response complete flags. The enum values must be
/// power-of-2 values.
typedef
enum
tritonserver_responsecompleteflag_enum
{
TRITONSERVER_RESPONSE_COMPLETE_FINAL
=
1
}
TRITONSERVER_ResponseCompleteFlag
;
/// Type for inference request release callback function. The callback
/// indicates what type of release is being performed on the request
/// and for some of these the callback function takes ownership of the
/// TRITONSERVER_InferenceRequest object. The 'userp' data is the data
/// provided as 'request_release_userp' in the call to
/// TRITONSERVER_InferenceRequestSetReleaseCallback.
///
/// One or more flags will be specified when the callback is invoked,
/// and the callback must take the following actions:
///
/// - TRITONSERVER_REQUEST_RELEASE_ALL: The entire inference request
/// is being released and ownership is passed to the callback
/// function. Triton will not longer access the 'request' object
/// itself nor any input tensor data associated with the
/// request. The callback should free or otherwise manage the
/// 'request' object and all associated tensor data.
///
/// Note that currently TRITONSERVER_REQUEST_RELEASE_ALL should always
/// be set when the callback is invoked but in the future that may
/// change, so the callback should explicitly check for the flag
/// before taking ownership of the request object.
///
typedef
void
(
*
TRITONSERVER_InferenceRequestReleaseFn_t
)(
TRITONSERVER_InferenceRequest
*
request
,
const
uint32_t
flags
,
void
*
userp
);
/// Type for callback function indicating that an inference response
/// has completed. The callback function takes ownership of the
/// TRITONSERVER_InferenceResponse object. The 'userp' data is the
/// data provided as 'response_userp' in the call to
/// TRITONSERVER_InferenceRequestSetResponseCallback.
///
/// One or more flags may be specified when the callback is invoked:
///
/// - TRITONSERVER_RESPONSE_COMPLETE_FINAL: Indicates that no more
/// responses will be generated for a given request (more
/// specifically, that no more responses will be generated for the
/// inference request that set this callback and 'userp'). When
/// this flag is set 'response' may be a response object or may be
/// nullptr. If 'response' is not nullptr, then 'response' is the
/// last response that Triton will produce for the request. If
/// 'response' is nullptr then Triton is indicating that no more
/// responses will be produced for the request.
typedef
void
(
*
TRITONSERVER_InferenceResponseCompleteFn_t
)(
TRITONSERVER_InferenceResponse
*
response
,
const
uint32_t
flags
,
void
*
userp
);
/// Create a new inference request object.
///
/// \param inference_request Returns the new request object.
/// \param server the inference server object.
/// \param model_name The name of the model to use for the request.
/// \param model_version The version of the model to use for the
/// request. If -1 then the server will choose a version based on the
/// model's policy.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestNew
(
TRITONSERVER_InferenceRequest
**
inference_request
,
TRITONSERVER_Server
*
server
,
const
char
*
model_name
,
const
int64_t
model_version
);
/// Delete an inference request object.
///
/// \param inference_request The request object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestDelete
(
TRITONSERVER_InferenceRequest
*
inference_request
);
/// Get the ID for a request. The returned ID is owned by
/// 'inference_request' and must not be modified or freed by the
/// caller.
///
/// \param inference_request The request object.
/// \param id Returns the ID.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestId
(
TRITONSERVER_InferenceRequest
*
inference_request
,
const
char
**
id
);
/// Set the ID for a request.
///
/// \param inference_request The request object.
/// \param id The ID.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestSetId
(
TRITONSERVER_InferenceRequest
*
inference_request
,
const
char
*
id
);
/// Get the flag(s) associated with a request. On return 'flags' holds
/// a bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
/// available flags.
///
/// \param inference_request The request object.
/// \param flags Returns the flags.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestFlags
(
TRITONSERVER_InferenceRequest
*
inference_request
,
uint32_t
*
flags
);
/// Set the flag(s) associated with a request. 'flags' should hold a
/// bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
/// available flags.
///
/// \param inference_request The request object.
/// \param flags The flags.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestSetFlags
(
TRITONSERVER_InferenceRequest
*
inference_request
,
uint32_t
flags
);
/// Get the correlation ID of the inference request as an unsigned integer.
/// Default is 0, which indicates that the request has no correlation ID.
/// If the correlation id associated with the inference request is a string,
/// this function will return a failure. The correlation ID is used
/// to indicate two or more inference request are related to each other.
/// How this relationship is handled by the inference server is determined by
/// the model's scheduling policy.
///
/// \param inference_request The request object.
/// \param correlation_id Returns the correlation ID.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestCorrelationId
(
TRITONSERVER_InferenceRequest
*
inference_request
,
uint64_t
*
correlation_id
);
/// Get the correlation ID of the inference request as a string.
/// Default is empty "", which indicates that the request has no correlation ID.
/// If the correlation id associated with the inference request is an unsigned
/// integer, then this function will return a failure. The correlation ID
/// is used to indicate two or more inference request are related to each other.
/// How this relationship is handled by the inference server is determined by
/// the model's scheduling policy.
///
/// \param inference_request The request object.
/// \param correlation_id Returns the correlation ID.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestCorrelationIdString
(
TRITONSERVER_InferenceRequest
*
inference_request
,
const
char
**
correlation_id
);
/// Set the correlation ID of the inference request to be an unsigned integer.
/// Default is 0, which indicates that the request has no correlation ID.
/// The correlation ID is used to indicate two or more inference request
/// are related to each other. How this relationship is handled by the
/// inference server is determined by the model's scheduling policy.
///
/// \param inference_request The request object.
/// \param correlation_id The correlation ID.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestSetCorrelationId
(
TRITONSERVER_InferenceRequest
*
inference_request
,
uint64_t
correlation_id
);
/// Set the correlation ID of the inference request to be a string.
/// The correlation ID is used to indicate two or more inference
/// request are related to each other. How this relationship is
/// handled by the inference server is determined by the model's
/// scheduling policy.
///
/// \param inference_request The request object.
/// \param correlation_id The correlation ID.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestSetCorrelationIdString
(
TRITONSERVER_InferenceRequest
*
inference_request
,
const
char
*
correlation_id
);
/// Get the priority for a request. The default is 0 indicating that
/// the request does not specify a priority and so will use the
/// model's default priority.
///
/// \param inference_request The request object.
/// \param priority Returns the priority level.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestPriority
(
TRITONSERVER_InferenceRequest
*
inference_request
,
uint32_t
*
priority
);
/// Set the priority for a request. The default is 0 indicating that
/// the request does not specify a priority and so will use the
/// model's default priority.
///
/// \param inference_request The request object.
/// \param priority The priority level.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestSetPriority
(
TRITONSERVER_InferenceRequest
*
inference_request
,
uint32_t
priority
);
/// Get the timeout for a request, in microseconds. The default is 0
/// which indicates that the request has no timeout.
///
/// \param inference_request The request object.
/// \param timeout_us Returns the timeout, in microseconds.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestTimeoutMicroseconds
(
TRITONSERVER_InferenceRequest
*
inference_request
,
uint64_t
*
timeout_us
);
/// Set the timeout for a request, in microseconds. The default is 0
/// which indicates that the request has no timeout.
///
/// \param inference_request The request object.
/// \param timeout_us The timeout, in microseconds.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestSetTimeoutMicroseconds
(
TRITONSERVER_InferenceRequest
*
inference_request
,
uint64_t
timeout_us
);
/// Add an input to a request.
///
/// \param inference_request The request object.
/// \param name The name of the input.
/// \param datatype The type of the input. Valid type names are BOOL,
/// UINT8, UINT16, UINT32, UINT64, INT8, INT16, INT32, INT64, FP16,
/// FP32, FP64, and BYTES.
/// \param shape The shape of the input.
/// \param dim_count The number of dimensions of 'shape'.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestAddInput
(
TRITONSERVER_InferenceRequest
*
inference_request
,
const
char
*
name
,
const
TRITONSERVER_DataType
datatype
,
const
int64_t
*
shape
,
uint64_t
dim_count
);
/// Add a raw input to a request. The name recognized by the model, data type
/// and shape of the input will be deduced from model configuration.
/// This function must be called at most once on request with no other input to
/// ensure the deduction is accurate.
///
/// \param inference_request The request object.
/// \param name The name of the input. This name is only used as a reference
/// of the raw input in other Tritonserver APIs. It doesn't assoicate with the
/// name used in the model.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestAddRawInput
(
TRITONSERVER_InferenceRequest
*
inference_request
,
const
char
*
name
);
/// Remove an input from a request.
///
/// \param inference_request The request object.
/// \param name The name of the input.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestRemoveInput
(
TRITONSERVER_InferenceRequest
*
inference_request
,
const
char
*
name
);
/// Remove all inputs from a request.
///
/// \param inference_request The request object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestRemoveAllInputs
(
TRITONSERVER_InferenceRequest
*
inference_request
);
/// Assign a buffer of data to an input. The buffer will be appended
/// to any existing buffers for that input. The 'inference_request'
/// object takes ownership of the buffer and so the caller should not
/// modify or free the buffer until that ownership is released by
/// 'inference_request' being deleted or by the input being removed
/// from 'inference_request'.
///
/// \param inference_request The request object.
/// \param name The name of the input.
/// \param base The base address of the input data.
/// \param byte_size The size, in bytes, of the input data.
/// \param memory_type The memory type of the input data.
/// \param memory_type_id The memory type id of the input data.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestAppendInputData
(
TRITONSERVER_InferenceRequest
*
inference_request
,
const
char
*
name
,
const
void
*
base
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
);
/// Assign a buffer of data to an input for execution on all model instances
/// with the specified host policy. The buffer will be appended to any existing
/// buffers for that input on all devices with this host policy. The
/// 'inference_request' object takes ownership of the buffer and so the caller
/// should not modify or free the buffer until that ownership is released by
/// 'inference_request' being deleted or by the input being removed from
/// 'inference_request'. If the execution is scheduled on a device that does not
/// have a input buffer specified using this function, then the input buffer
/// specified with TRITONSERVER_InferenceRequestAppendInputData will be used so
/// a non-host policy specific version of data must be added using that API.
/// \param inference_request The request object.
/// \param name The name of the input.
/// \param base The base address of the input data.
/// \param byte_size The size, in bytes, of the input data.
/// \param memory_type The memory type of the input data.
/// \param memory_type_id The memory type id of the input data.
/// \param host_policy_name All model instances executing with this host_policy
/// will use this input buffer for execution.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy
(
TRITONSERVER_InferenceRequest
*
inference_request
,
const
char
*
name
,
const
void
*
base
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
,
const
char
*
host_policy_name
);
/// Assign a buffer of data to an input. The buffer will be appended
/// to any existing buffers for that input. The 'inference_request'
/// object takes ownership of the buffer and so the caller should not
/// modify or free the buffer until that ownership is released by
/// 'inference_request' being deleted or by the input being removed
/// from 'inference_request'.
///
/// \param inference_request The request object.
/// \param name The name of the input.
/// \param base The base address of the input data.
/// \param buffer_attributes The buffer attrubutes of the input.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes
(
TRITONSERVER_InferenceRequest
*
inference_request
,
const
char
*
name
,
const
void
*
base
,
TRITONSERVER_BufferAttributes
*
buffer_attributes
);
/// Clear all input data from an input, releasing ownership of the
/// buffer(s) that were appended to the input with
/// TRITONSERVER_InferenceRequestAppendInputData or
/// TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy
/// \param inference_request The request object.
/// \param name The name of the input.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestRemoveAllInputData
(
TRITONSERVER_InferenceRequest
*
inference_request
,
const
char
*
name
);
/// Add an output request to an inference request.
///
/// \param inference_request The request object.
/// \param name The name of the output.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestAddRequestedOutput
(
TRITONSERVER_InferenceRequest
*
inference_request
,
const
char
*
name
);
/// Remove an output request from an inference request.
///
/// \param inference_request The request object.
/// \param name The name of the output.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestRemoveRequestedOutput
(
TRITONSERVER_InferenceRequest
*
inference_request
,
const
char
*
name
);
/// Remove all output requests from an inference request.
///
/// \param inference_request The request object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs
(
TRITONSERVER_InferenceRequest
*
inference_request
);
/// Set the release callback for an inference request. The release
/// callback is called by Triton to return ownership of the request
/// object.
///
/// \param inference_request The request object.
/// \param request_release_fn The function called to return ownership
/// of the 'inference_request' object.
/// \param request_release_userp User-provided pointer that is
/// delivered to the 'request_release_fn' callback.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestSetReleaseCallback
(
TRITONSERVER_InferenceRequest
*
inference_request
,
TRITONSERVER_InferenceRequestReleaseFn_t
request_release_fn
,
void
*
request_release_userp
);
/// Set the allocator and response callback for an inference
/// request. The allocator is used to allocate buffers for any output
/// tensors included in responses that are produced for this
/// request. The response callback is called to return response
/// objects representing responses produced for this request.
///
/// \param inference_request The request object.
/// \param response_allocator The TRITONSERVER_ResponseAllocator to use
/// to allocate buffers to hold inference results.
/// \param response_allocator_userp User-provided pointer that is
/// delivered to the response allocator's start and allocation functions.
/// \param response_fn The function called to deliver an inference
/// response for this request.
/// \param response_userp User-provided pointer that is delivered to
/// the 'response_fn' callback.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceRequestSetResponseCallback
(
TRITONSERVER_InferenceRequest
*
inference_request
,
TRITONSERVER_ResponseAllocator
*
response_allocator
,
void
*
response_allocator_userp
,
TRITONSERVER_InferenceResponseCompleteFn_t
response_fn
,
void
*
response_userp
);
/// TRITONSERVER_InferenceResponse
///
/// Object representing an inference response. The inference response
/// provides the meta-data and output tensor values calculated by the
/// inference.
///
/// Delete an inference response object.
///
/// \param inference_response The response object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceResponseDelete
(
TRITONSERVER_InferenceResponse
*
inference_response
);
/// Return the error status of an inference response. Return a
/// TRITONSERVER_Error object on failure, return nullptr on success.
/// The returned error object is owned by 'inference_response' and so
/// should not be deleted by the caller.
///
/// \param inference_response The response object.
/// \return a TRITONSERVER_Error indicating the success or failure
/// status of the response.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceResponseError
(
TRITONSERVER_InferenceResponse
*
inference_response
);
/// Get model used to produce a response. The caller does not own the
/// returned model name value and must not modify or delete it. The
/// lifetime of all returned values extends until 'inference_response'
/// is deleted.
///
/// \param inference_response The response object.
/// \param model_name Returns the name of the model.
/// \param model_version Returns the version of the model.
/// this response.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceResponseModel
(
TRITONSERVER_InferenceResponse
*
inference_response
,
const
char
**
model_name
,
int64_t
*
model_version
);
/// Get the ID of the request corresponding to a response. The caller
/// does not own the returned ID and must not modify or delete it. The
/// lifetime of all returned values extends until 'inference_response'
/// is deleted.
///
/// \param inference_response The response object.
/// \param request_id Returns the ID of the request corresponding to
/// this response.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceResponseId
(
TRITONSERVER_InferenceResponse
*
inference_response
,
const
char
**
request_id
);
/// Get the number of parameters available in the response.
///
/// \param inference_response The response object.
/// \param count Returns the number of parameters.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceResponseParameterCount
(
TRITONSERVER_InferenceResponse
*
inference_response
,
uint32_t
*
count
);
/// Get all information about a parameter. The caller does not own any
/// of the returned values and must not modify or delete them. The
/// lifetime of all returned values extends until 'inference_response'
/// is deleted.
///
/// The 'vvalue' returns a void* pointer that must be cast
/// appropriately based on 'type'. For example:
///
/// void* vvalue;
/// TRITONSERVER_ParameterType type;
/// TRITONSERVER_InferenceResponseParameter(
/// response, index, &name, &type, &vvalue);
/// switch (type) {
/// case TRITONSERVER_PARAMETER_BOOL:
/// bool value = *(reinterpret_cast<bool*>(vvalue));
/// ...
/// case TRITONSERVER_PARAMETER_INT:
/// int64_t value = *(reinterpret_cast<int64_t*>(vvalue));
/// ...
/// case TRITONSERVER_PARAMETER_STRING:
/// const char* value = reinterpret_cast<const char*>(vvalue);
/// ...
///
/// \param inference_response The response object.
/// \param index The index of the parameter, must be 0 <= index <
/// count, where 'count' is the value returned by
/// TRITONSERVER_InferenceResponseParameterCount.
/// \param name Returns the name of the parameter.
/// \param type Returns the type of the parameter.
/// \param vvalue Returns a pointer to the parameter value.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceResponseParameter
(
TRITONSERVER_InferenceResponse
*
inference_response
,
const
uint32_t
index
,
const
char
**
name
,
TRITONSERVER_ParameterType
*
type
,
const
void
**
vvalue
);
/// Get the number of outputs available in the response.
///
/// \param inference_response The response object.
/// \param count Returns the number of output tensors.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceResponseOutputCount
(
TRITONSERVER_InferenceResponse
*
inference_response
,
uint32_t
*
count
);
/// Get all information about an output tensor. The tensor data is
/// returned as the base pointer to the data and the size, in bytes,
/// of the data. The caller does not own any of the returned values
/// and must not modify or delete them. The lifetime of all returned
/// values extends until 'inference_response' is deleted.
///
/// \param inference_response The response object.
/// \param index The index of the output tensor, must be 0 <= index <
/// count, where 'count' is the value returned by
/// TRITONSERVER_InferenceResponseOutputCount.
/// \param name Returns the name of the output.
/// \param datatype Returns the type of the output.
/// \param shape Returns the shape of the output.
/// \param dim_count Returns the number of dimensions of the returned
/// shape.
/// \param base Returns the tensor data for the output.
/// \param byte_size Returns the size, in bytes, of the data.
/// \param memory_type Returns the memory type of the data.
/// \param memory_type_id Returns the memory type id of the data.
/// \param userp The user-specified value associated with the buffer
/// in TRITONSERVER_ResponseAllocatorAllocFn_t.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceResponseOutput
(
TRITONSERVER_InferenceResponse
*
inference_response
,
const
uint32_t
index
,
const
char
**
name
,
TRITONSERVER_DataType
*
datatype
,
const
int64_t
**
shape
,
uint64_t
*
dim_count
,
const
void
**
base
,
size_t
*
byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
,
void
**
userp
);
/// Get a classification label associated with an output for a given
/// index. The caller does not own the returned label and must not
/// modify or delete it. The lifetime of all returned label extends
/// until 'inference_response' is deleted.
///
/// \param inference_response The response object.
/// \param index The index of the output tensor, must be 0 <= index <
/// count, where 'count' is the value returned by
/// TRITONSERVER_InferenceResponseOutputCount.
/// \param class_index The index of the class.
/// \param name Returns the label corresponding to 'class_index' or
/// nullptr if no label.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_InferenceResponseOutputClassificationLabel
(
TRITONSERVER_InferenceResponse
*
inference_response
,
const
uint32_t
index
,
const
size_t
class_index
,
const
char
**
label
);
/// TRITONSERVER_BufferAttributes
///
/// API to create, modify, or retrieve attributes associated with a buffer.
///
/// Create a new buffer attributes object. The caller takes ownership of
/// the TRITONSERVER_BufferAttributes object and must call
/// TRITONSERVER_BufferAttributesDelete to release the object.
///
/// \param buffer_attributes Returns the new buffer attributes object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_BufferAttributesNew
(
TRITONSERVER_BufferAttributes
**
buffer_attributes
);
/// Delete a buffer attributes object.
///
/// \param buffer_attributes The buffer_attributes object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_BufferAttributesDelete
(
TRITONSERVER_BufferAttributes
*
buffer_attributes
);
/// Set the memory type id field of the buffer attributes.
///
/// \param buffer_attributes The buffer attributes object.
/// \param memory_type_id Memory type id to assign to the buffer attributes
/// object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_BufferAttributesSetMemoryTypeId
(
TRITONSERVER_BufferAttributes
*
buffer_attributes
,
int64_t
memory_type_id
);
/// Set the memory type field of the buffer attributes.
///
/// \param buffer_attributes The buffer attributes object.
/// \param memory_type Memory type to assign to the buffer attributes object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_BufferAttributesSetMemoryType
(
TRITONSERVER_BufferAttributes
*
buffer_attributes
,
TRITONSERVER_MemoryType
memory_type
);
/// Set the CudaIpcHandle field of the buffer attributes.
///
/// \param buffer_attributes The buffer attributes object.
/// \param cuda_ipc_handle The CudaIpcHandle to assign to the buffer attributes
/// object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_BufferAttributesSetCudaIpcHandle
(
TRITONSERVER_BufferAttributes
*
buffer_attributes
,
void
*
cuda_ipc_handle
);
/// Set the byte size field of the buffer attributes.
///
/// \param buffer_attributes The buffer attributes object.
/// \param byte_size Byte size to assign to the buffer attributes object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_BufferAttributesSetByteSize
(
TRITONSERVER_BufferAttributes
*
buffer_attributes
,
size_t
byte_size
);
/// Get the memory type id field of the buffer attributes.
///
/// \param buffer_attributes The buffer attributes object.
/// \param memory_type_id Returns the memory type id associated with the buffer
/// attributes object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_BufferAttributesMemoryTypeId
(
TRITONSERVER_BufferAttributes
*
buffer_attributes
,
int64_t
*
memory_type_id
);
/// Get the memory type field of the buffer attributes.
///
/// \param buffer_attributes The buffer attributes object.
/// \param memory_type Returns the memory type associated with the buffer
/// attributes object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_BufferAttributesMemoryType
(
TRITONSERVER_BufferAttributes
*
buffer_attributes
,
TRITONSERVER_MemoryType
*
memory_type
);
/// Get the CudaIpcHandle field of the buffer attributes object.
///
/// \param buffer_attributes The buffer attributes object.
/// \param cuda_ipc_handle Returns the memory type associated with the buffer
/// attributes object. If the cudaIpcHandle does not exist for the buffer,
/// nullptr will be returned.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_BufferAttributesCudaIpcHandle
(
TRITONSERVER_BufferAttributes
*
buffer_attributes
,
void
**
cuda_ipc_handle
);
/// Get the byte size field of the buffer attributes.
///
/// \param buffer_attributes The buffer attributes object.
/// \param byte_size Returns the byte size associated with the buffer attributes
/// object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_BufferAttributesByteSize
(
TRITONSERVER_BufferAttributes
*
buffer_attributes
,
size_t
*
byte_size
);
/// TRITONSERVER_ServerOptions
///
/// Options to use when creating an inference server.
///
/// Model control modes
typedef
enum
tritonserver_modelcontrolmode_enum
{
TRITONSERVER_MODEL_CONTROL_NONE
,
TRITONSERVER_MODEL_CONTROL_POLL
,
TRITONSERVER_MODEL_CONTROL_EXPLICIT
}
TRITONSERVER_ModelControlMode
;
/// Rate limit modes
typedef
enum
tritonserver_ratelimitmode_enum
{
TRITONSERVER_RATE_LIMIT_OFF
,
TRITONSERVER_RATE_LIMIT_EXEC_COUNT
}
TRITONSERVER_RateLimitMode
;
/// Create a new server options object. The caller takes ownership of
/// the TRITONSERVER_ServerOptions object and must call
/// TRITONSERVER_ServerOptionsDelete to release the object.
///
/// \param options Returns the new server options object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsNew
(
TRITONSERVER_ServerOptions
**
options
);
/// Delete a server options object.
///
/// \param options The server options object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsDelete
(
TRITONSERVER_ServerOptions
*
options
);
/// Set the textual ID for the server in a server options. The ID is a
/// name that identifies the server.
///
/// \param options The server options object.
/// \param server_id The server identifier.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetServerId
(
TRITONSERVER_ServerOptions
*
options
,
const
char
*
server_id
);
/// Set the model repository path in a server options. The path must be
/// the full absolute path to the model repository. This function can be called
/// multiple times with different paths to set multiple model repositories.
/// Note that if a model is not unique across all model repositories
/// at any time, the model will not be available.
///
/// \param options The server options object.
/// \param model_repository_path The full path to the model repository.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetModelRepositoryPath
(
TRITONSERVER_ServerOptions
*
options
,
const
char
*
model_repository_path
);
/// Set the model control mode in a server options. For each mode the models
/// will be managed as the following:
///
/// TRITONSERVER_MODEL_CONTROL_NONE: the models in model repository will be
/// loaded on startup. After startup any changes to the model repository will
/// be ignored. Calling TRITONSERVER_ServerPollModelRepository will result in
/// an error.
///
/// TRITONSERVER_MODEL_CONTROL_POLL: the models in model repository will be
/// loaded on startup. The model repository can be polled periodically using
/// TRITONSERVER_ServerPollModelRepository and the server will load, unload,
/// and updated models according to changes in the model repository.
///
/// TRITONSERVER_MODEL_CONTROL_EXPLICIT: the models in model repository will
/// not be loaded on startup. The corresponding model control APIs must be
/// called to load / unload a model in the model repository.
///
/// \param options The server options object.
/// \param mode The mode to use for the model control.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetModelControlMode
(
TRITONSERVER_ServerOptions
*
options
,
TRITONSERVER_ModelControlMode
mode
);
/// Set the model to be loaded at startup in a server options. The model must be
/// present in one, and only one, of the specified model repositories.
/// This function can be called multiple times with different model name
/// to set multiple startup models.
/// Note that it only takes affect on TRITONSERVER_MODEL_CONTROL_EXPLICIT mode.
///
/// \param options The server options object.
/// \param mode_name The name of the model to load on startup.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetStartupModel
(
TRITONSERVER_ServerOptions
*
options
,
const
char
*
model_name
);
/// Enable or disable strict model configuration handling in a server
/// options.
///
/// \param options The server options object.
/// \param strict True to enable strict model configuration handling,
/// false to disable.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetStrictModelConfig
(
TRITONSERVER_ServerOptions
*
options
,
bool
strict
);
/// Set the rate limit mode in a server options.
///
/// TRITONSERVER_RATE_LIMIT_EXEC_COUNT: The rate limiting prioritizes the
/// inference execution using the number of times each instance has got a
/// chance to run. The execution gets to run only when its resource
/// constraints are satisfied.
///
/// TRITONSERVER_RATE_LIMIT_OFF: The rate limiting is turned off and the
/// inference gets executed whenever an instance is available.
///
/// \param options The server options object.
/// \param mode The mode to use for the rate limiting. By default, execution
/// count is used to determine the priorities.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetRateLimiterMode
(
TRITONSERVER_ServerOptions
*
options
,
TRITONSERVER_RateLimitMode
mode
);
/// Add resource count for rate limiting.
///
/// \param options The server options object.
/// \param name The name of the resource.
/// \param count The count of the resource.
/// \param device The device identifier for the resource. A value of -1
/// indicates that the specified number of resources are available on every
/// device. The device value is ignored for a global resource. The server
/// will use the rate limiter configuration specified for instance groups
/// in model config to determine whether resource is global. In case of
/// conflicting resource type in different model configurations, server
/// will raise an appropriate error while loading model.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsAddRateLimiterResource
(
TRITONSERVER_ServerOptions
*
options
,
const
char
*
resource_name
,
const
size_t
resource_count
,
const
int
device
);
/// Set the total pinned memory byte size that the server can allocate
/// in a server options. The pinned memory pool will be shared across
/// Triton itself and the backends that use
/// TRITONBACKEND_MemoryManager to allocate memory.
///
/// \param options The server options object.
/// \param size The pinned memory pool byte size.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize
(
TRITONSERVER_ServerOptions
*
options
,
uint64_t
size
);
/// Set the total CUDA memory byte size that the server can allocate
/// on given GPU device in a server options. The pinned memory pool
/// will be shared across Triton itself and the backends that use
/// TRITONBACKEND_MemoryManager to allocate memory.
///
/// \param options The server options object.
/// \param gpu_device The GPU device to allocate the memory pool.
/// \param size The CUDA memory pool byte size.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize
(
TRITONSERVER_ServerOptions
*
options
,
int
gpu_device
,
uint64_t
size
);
/// Set the total response cache byte size that the server can allocate in CPU
/// memory. The response cache will be shared across all inference requests and
/// across all models.
///
/// \param options The server options object.
/// \param size The total response cache byte size.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetResponseCacheByteSize
(
TRITONSERVER_ServerOptions
*
options
,
uint64_t
size
);
/// Set the minimum support CUDA compute capability in a server
/// options.
///
/// \param options The server options object.
/// \param cc The minimum CUDA compute capability.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability
(
TRITONSERVER_ServerOptions
*
options
,
double
cc
);
/// Enable or disable exit-on-error in a server options.
///
/// \param options The server options object.
/// \param exit True to enable exiting on intialization error, false
/// to continue.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetExitOnError
(
TRITONSERVER_ServerOptions
*
options
,
bool
exit
);
/// Enable or disable strict readiness handling in a server options.
///
/// \param options The server options object.
/// \param strict True to enable strict readiness handling, false to
/// disable.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetStrictReadiness
(
TRITONSERVER_ServerOptions
*
options
,
bool
strict
);
/// Set the exit timeout, in seconds, for the server in a server
/// options.
///
/// \param options The server options object.
/// \param timeout The exit timeout, in seconds.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetExitTimeout
(
TRITONSERVER_ServerOptions
*
options
,
unsigned
int
timeout
);
/// Set the number of threads used in buffer manager in a server options.
///
/// \param options The server options object.
/// \param thread_count The number of threads.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetBufferManagerThreadCount
(
TRITONSERVER_ServerOptions
*
options
,
unsigned
int
thread_count
);
/// Set the number of threads to concurrently load models in a server options.
///
/// \param options The server options object.
/// \param thread_count The number of threads.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetModelLoadThreadCount
(
TRITONSERVER_ServerOptions
*
options
,
unsigned
int
thread_count
);
/// Provide a log output file.
///
/// \param options The server options object.
/// \param file a string defining the file where the log outputs will be saved.
/// An empty string for the file name will cause triton to direct logging
/// facilities to the console
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetLogFile
(
TRITONSERVER_ServerOptions
*
options
,
const
char
*
file
);
/// Enable or disable info level logging.
///
/// \param options The server options object.
/// \param log True to enable info logging, false to disable.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetLogInfo
(
TRITONSERVER_ServerOptions
*
options
,
bool
log
);
/// Enable or disable warning level logging.
///
/// \param options The server options object.
/// \param log True to enable warning logging, false to disable.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetLogWarn
(
TRITONSERVER_ServerOptions
*
options
,
bool
log
);
/// Enable or disable error level logging.
///
/// \param options The server options object.
/// \param log True to enable error logging, false to disable.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetLogError
(
TRITONSERVER_ServerOptions
*
options
,
bool
log
);
/// Set the logging format.
///
/// \param options The server options object.
/// \param format The logging format.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetLogFormat
(
TRITONSERVER_ServerOptions
*
options
,
const
TRITONSERVER_LogFormat
format
);
/// Set verbose logging level. Level zero disables verbose logging.
///
/// \param options The server options object.
/// \param level The verbose logging level.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetLogVerbose
(
TRITONSERVER_ServerOptions
*
options
,
int
level
);
/// Enable or disable metrics collection in a server options.
///
/// \param options The server options object.
/// \param metrics True to enable metrics, false to disable.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetMetrics
(
TRITONSERVER_ServerOptions
*
options
,
bool
metrics
);
/// Enable or disable GPU metrics collection in a server options. GPU
/// metrics are collected if both this option and
/// TRITONSERVER_ServerOptionsSetMetrics are true.
///
/// \param options The server options object.
/// \param gpu_metrics True to enable GPU metrics, false to disable.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetGpuMetrics
(
TRITONSERVER_ServerOptions
*
options
,
bool
gpu_metrics
);
/// Enable or disable CPU metrics collection in a server options. CPU
/// metrics are collected if both this option and
/// TRITONSERVER_ServerOptionsSetMetrics are true.
///
/// \param options The server options object.
/// \param cpu_metrics True to enable CPU metrics, false to disable.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetCpuMetrics
(
TRITONSERVER_ServerOptions
*
options
,
bool
cpu_metrics
);
/// Set the interval for metrics collection in a server options.
/// This is 2000 milliseconds by default.
///
/// \param options The server options object.
/// \param metrics_interval_ms The time interval in ms between
/// successive metrics updates.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetMetricsInterval
(
TRITONSERVER_ServerOptions
*
options
,
uint64_t
metrics_interval_ms
);
/// Set the directory containing backend shared libraries. This
/// directory is searched last after the version and model directory
/// in the model repository when looking for the backend shared
/// library for a model. If the backend is named 'be' the directory
/// searched is 'backend_dir'/be/libtriton_be.so.
///
/// \param options The server options object.
/// \param backend_dir The full path of the backend directory.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetBackendDirectory
(
TRITONSERVER_ServerOptions
*
options
,
const
char
*
backend_dir
);
/// Set the directory containing repository agent shared libraries. This
/// directory is searched when looking for the repository agent shared
/// library for a model. If the backend is named 'ra' the directory
/// searched is 'repoagent_dir'/ra/libtritonrepoagent_ra.so.
///
/// \param options The server options object.
/// \param repoagent_dir The full path of the repository agent directory.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetRepoAgentDirectory
(
TRITONSERVER_ServerOptions
*
options
,
const
char
*
repoagent_dir
);
/// Specify the limit on memory usage as a fraction on the device identified by
/// 'kind' and 'device_id'. If model loading on the device is requested and the
/// current memory usage exceeds the limit, the load will be rejected. If not
/// specified, the limit will not be set.
///
/// Currently support TRITONSERVER_INSTANCEGROUPKIND_GPU
///
/// \param options The server options object.
/// \param kind The kind of the device.
/// \param device_id The id of the device.
/// \param fraction The limit on memory usage as a fraction
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit
(
TRITONSERVER_ServerOptions
*
options
,
const
TRITONSERVER_InstanceGroupKind
kind
,
const
int
device_id
,
const
double
fraction
);
/// Set a configuration setting for a named backend in a server
/// options.
///
/// \param options The server options object.
/// \param backend_name The name of the backend.
/// \param setting The name of the setting.
/// \param value The setting value.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetBackendConfig
(
TRITONSERVER_ServerOptions
*
options
,
const
char
*
backend_name
,
const
char
*
setting
,
const
char
*
value
);
/// Set a host policy setting for a given policy name in a server options.
///
/// \param options The server options object.
/// \param policy_name The name of the policy.
/// \param setting The name of the setting.
/// \param value The setting value.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerOptionsSetHostPolicy
(
TRITONSERVER_ServerOptions
*
options
,
const
char
*
policy_name
,
const
char
*
setting
,
const
char
*
value
);
/// TRITONSERVER_Server
///
/// An inference server.
///
/// Model batch flags. The enum values must be power-of-2 values.
typedef
enum
tritonserver_batchflag_enum
{
TRITONSERVER_BATCH_UNKNOWN
=
1
,
TRITONSERVER_BATCH_FIRST_DIM
=
2
}
TRITONSERVER_ModelBatchFlag
;
/// Model index flags. The enum values must be power-of-2 values.
typedef
enum
tritonserver_modelindexflag_enum
{
TRITONSERVER_INDEX_FLAG_READY
=
1
}
TRITONSERVER_ModelIndexFlag
;
/// Model transaction policy flags. The enum values must be
/// power-of-2 values.
typedef
enum
tritonserver_txn_property_flag_enum
{
TRITONSERVER_TXN_ONE_TO_ONE
=
1
,
TRITONSERVER_TXN_DECOUPLED
=
2
}
TRITONSERVER_ModelTxnPropertyFlag
;
/// Create a new server object. The caller takes ownership of the
/// TRITONSERVER_Server object and must call TRITONSERVER_ServerDelete
/// to release the object.
///
/// \param server Returns the new inference server object.
/// \param options The inference server options object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerNew
(
TRITONSERVER_Server
**
server
,
TRITONSERVER_ServerOptions
*
options
);
/// Delete a server object. If server is not already stopped it is
/// stopped before being deleted.
///
/// \param server The inference server object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerDelete
(
TRITONSERVER_Server
*
server
);
/// Stop a server object. A server can't be restarted once it is
/// stopped.
///
/// \param server The inference server object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerStop
(
TRITONSERVER_Server
*
server
);
/// Register a new model repository. Not available in polling mode.
///
/// \param server The inference server object.
/// \param repository_path The full path to the model repository.
/// \param name_mapping List of name_mapping parameters. Each mapping has
/// the model directory name as its key, overriden model name as its value.
/// \param model_count Number of mappings provided.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerRegisterModelRepository
(
TRITONSERVER_Server
*
server
,
const
char
*
repository_path
,
const
TRITONSERVER_Parameter
**
name_mapping
,
const
uint32_t
mapping_count
);
/// Unregister a model repository. Not available in polling mode.
///
/// \param server The inference server object.
/// \param repository_path The full path to the model repository.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerUnregisterModelRepository
(
TRITONSERVER_Server
*
server
,
const
char
*
repository_path
);
/// Check the model repository for changes and update server state
/// based on those changes.
///
/// \param server The inference server object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerPollModelRepository
(
TRITONSERVER_Server
*
server
);
/// Is the server live?
///
/// \param server The inference server object.
/// \param live Returns true if server is live, false otherwise.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerIsLive
(
TRITONSERVER_Server
*
server
,
bool
*
live
);
/// Is the server ready?
///
/// \param server The inference server object.
/// \param ready Returns true if server is ready, false otherwise.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerIsReady
(
TRITONSERVER_Server
*
server
,
bool
*
ready
);
/// Is the model ready?
///
/// \param server The inference server object.
/// \param model_name The name of the model to get readiness for.
/// \param model_version The version of the model to get readiness
/// for. If -1 then the server will choose a version based on the
/// model's policy.
/// \param ready Returns true if server is ready, false otherwise.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerModelIsReady
(
TRITONSERVER_Server
*
server
,
const
char
*
model_name
,
const
int64_t
model_version
,
bool
*
ready
);
/// Get the batch properties of the model. The properties are
/// communicated by a flags value and an (optional) object returned by
/// 'voidp'.
///
/// - TRITONSERVER_BATCH_UNKNOWN: Triton cannot determine the
/// batching properties of the model. This means that the model
/// does not support batching in any way that is useable by
/// Triton. The returned 'voidp' value is nullptr.
///
/// - TRITONSERVER_BATCH_FIRST_DIM: The model supports batching
/// along the first dimension of every input and output
/// tensor. Triton schedulers that perform batching can
/// automatically batch inference requests along this dimension.
/// The returned 'voidp' value is nullptr.
///
/// \param server The inference server object.
/// \param model_name The name of the model.
/// \param model_version The version of the model. If -1 then the
/// server will choose a version based on the model's policy.
/// \param flags Returns flags indicating the batch properties of the
/// model.
/// \param voidp If non-nullptr, returns a point specific to the
/// 'flags' value.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerModelBatchProperties
(
TRITONSERVER_Server
*
server
,
const
char
*
model_name
,
const
int64_t
model_version
,
uint32_t
*
flags
,
void
**
voidp
);
/// Get the transaction policy of the model. The policy is
/// communicated by a flags value.
///
/// - TRITONSERVER_TXN_ONE_TO_ONE: The model generates exactly
/// one response per request.
///
/// - TRITONSERVER_TXN_DECOUPLED: The model may generate zero
/// to many responses per request.
///
/// \param server The inference server object.
/// \param model_name The name of the model.
/// \param model_version The version of the model. If -1 then the
/// server will choose a version based on the model's policy.
/// \param txn_flags Returns flags indicating the transaction policy of the
/// model.
/// \param voidp If non-nullptr, returns a point specific to the 'flags' value.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerModelTransactionProperties
(
TRITONSERVER_Server
*
server
,
const
char
*
model_name
,
const
int64_t
model_version
,
uint32_t
*
txn_flags
,
void
**
voidp
);
/// Get the metadata of the server as a TRITONSERVER_Message object.
/// The caller takes ownership of the message object and must call
/// TRITONSERVER_MessageDelete to release the object.
///
/// \param server The inference server object.
/// \param server_metadata Returns the server metadata message.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerMetadata
(
TRITONSERVER_Server
*
server
,
TRITONSERVER_Message
**
server_metadata
);
/// Get the metadata of a model as a TRITONSERVER_Message
/// object. The caller takes ownership of the message object and must
/// call TRITONSERVER_MessageDelete to release the object.
///
/// \param server The inference server object.
/// \param model_name The name of the model.
/// \param model_version The version of the model.
/// If -1 then the server will choose a version based on the model's
/// policy.
/// \param model_metadata Returns the model metadata message.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerModelMetadata
(
TRITONSERVER_Server
*
server
,
const
char
*
model_name
,
const
int64_t
model_version
,
TRITONSERVER_Message
**
model_metadata
);
/// Get the statistics of a model as a TRITONSERVER_Message
/// object. The caller takes ownership of the object and must call
/// TRITONSERVER_MessageDelete to release the object.
///
/// \param server The inference server object.
/// \param model_name The name of the model.
/// If empty, then statistics for all available models will be returned,
/// and the server will choose a version based on those models' policies.
/// \param model_version The version of the model. If -1 then the
/// server will choose a version based on the model's policy.
/// \param model_stats Returns the model statistics message.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerModelStatistics
(
TRITONSERVER_Server
*
server
,
const
char
*
model_name
,
const
int64_t
model_version
,
TRITONSERVER_Message
**
model_stats
);
/// Get the configuration of a model as a TRITONSERVER_Message object.
/// The caller takes ownership of the message object and must call
/// TRITONSERVER_MessageDelete to release the object.
///
/// \param server The inference server object.
/// \param model_name The name of the model.
/// \param model_version The version of the model. If -1 then the
/// server will choose a version based on the model's policy.
/// \param config_version The model configuration will be returned in
/// a format matching this version. If the configuration cannot be
/// represented in the requested version's format then an error will
/// be returned. Currently only version 1 is supported.
/// \param model_config Returns the model config message.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerModelConfig
(
TRITONSERVER_Server
*
server
,
const
char
*
model_name
,
const
int64_t
model_version
,
const
uint32_t
config_version
,
TRITONSERVER_Message
**
model_config
);
/// Get the index of all unique models in the model repositories as a
/// TRITONSERVER_Message object. The caller takes ownership of the
/// message object and must call TRITONSERVER_MessageDelete to release
/// the object.
///
/// If TRITONSERVER_INDEX_FLAG_READY is set in 'flags' only the models
/// that are loaded into the server and ready for inferencing are
/// returned.
///
/// \param server The inference server object.
/// \param flags TRITONSERVER_ModelIndexFlag flags that control how to
/// collect the index.
/// \param model_index Return the model index message that holds the
/// index of all models contained in the server's model repository(s).
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerModelIndex
(
TRITONSERVER_Server
*
server
,
uint32_t
flags
,
TRITONSERVER_Message
**
model_index
);
/// Load the requested model or reload the model if it is already
/// loaded. The function does not return until the model is loaded or
/// fails to load. Returned error indicates if model loaded
/// successfully or not.
///
/// \param server The inference server object.
/// \param model_name The name of the model.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerLoadModel
(
TRITONSERVER_Server
*
server
,
const
char
*
model_name
);
/// Load the requested model or reload the model if it is already
/// loaded, with load parameters provided. The function does not return until
/// the model is loaded or fails to load. Returned error indicates if model
/// loaded successfully or not.
/// Currently the below parameter names are recognized:
/// - "config" : string parameter that contains a JSON representation of the
/// model configuration. This config will be used for loading the model instead
/// of the one in the model directory.
///
/// \param server The inference server object.
/// \param model_name The name of the model.
/// \param parameters The array of load parameters.
/// \param parameter_count The number of parameters.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerLoadModelWithParameters
(
TRITONSERVER_Server
*
server
,
const
char
*
model_name
,
const
TRITONSERVER_Parameter
**
parameters
,
const
uint64_t
parameter_count
);
/// Unload the requested model. Unloading a model that is not loaded
/// on server has no affect and success code will be returned.
/// The function does not wait for the requested model to be fully unload
/// and success code will be returned.
/// Returned error indicates if model unloaded successfully or not.
///
/// \param server The inference server object.
/// \param model_name The name of the model.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerUnloadModel
(
TRITONSERVER_Server
*
server
,
const
char
*
model_name
);
/// Unload the requested model, and also unload any dependent model that
/// was loaded along with the requested model (for example, the models composing
/// an ensemble). Unloading a model that is not loaded
/// on server has no affect and success code will be returned.
/// The function does not wait for the requested model and all dependent
/// models to be fully unload and success code will be returned.
/// Returned error indicates if model unloaded successfully or not.
///
/// \param server The inference server object.
/// \param model_name The name of the model.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerUnloadModelAndDependents
(
TRITONSERVER_Server
*
server
,
const
char
*
model_name
);
/// Get the current metrics for the server. The caller takes ownership
/// of the metrics object and must call TRITONSERVER_MetricsDelete to
/// release the object.
///
/// \param server The inference server object.
/// \param metrics Returns the metrics.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerMetrics
(
TRITONSERVER_Server
*
server
,
TRITONSERVER_Metrics
**
metrics
);
/// Perform inference using the meta-data and inputs supplied by the
/// 'inference_request'. If the function returns success, then the
/// caller releases ownership of 'inference_request' and must not
/// access it in any way after this call, until ownership is returned
/// via the 'request_release_fn' callback registered in the request
/// object with TRITONSERVER_InferenceRequestSetReleaseCallback.
///
/// The function unconditionally takes ownership of 'trace' and so the
/// caller must not access it in any way after this call (except in
/// the trace activity callbacks) until ownership is returned via the
/// trace's release_fn callback.
///
/// Responses produced for this request are returned using the
/// allocator and callback registered with the request by
/// TRITONSERVER_InferenceRequestSetResponseCallback.
///
/// \param server The inference server object.
/// \param inference_request The request object.
/// \param trace The trace object for this request, or nullptr if no
/// tracing.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_ServerInferAsync
(
TRITONSERVER_Server
*
server
,
TRITONSERVER_InferenceRequest
*
inference_request
,
TRITONSERVER_InferenceTrace
*
trace
);
/// TRITONSERVER_MetricKind
///
/// Types of metrics recognized by TRITONSERVER.
///
typedef
enum
TRITONSERVER_metrickind_enum
{
TRITONSERVER_METRIC_KIND_COUNTER
,
TRITONSERVER_METRIC_KIND_GAUGE
}
TRITONSERVER_MetricKind
;
/// Create a new metric family object. The caller takes ownership of the
/// TRITONSERVER_MetricFamily object and must call
/// TRITONSERVER_MetricFamilyDelete to release the object.
///
/// \param family Returns the new metric family object.
/// \param kind The type of metric family to create.
/// \param name The name of the metric family seen when calling the metrics
/// endpoint.
/// \param description The description of the metric family seen when
/// calling the metrics endpoint.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_MetricFamilyNew
(
TRITONSERVER_MetricFamily
**
family
,
const
TRITONSERVER_MetricKind
kind
,
const
char
*
name
,
const
char
*
description
);
/// Delete a metric family object.
/// A TRITONSERVER_MetricFamily* object should be deleted AFTER its
/// corresponding TRITONSERVER_Metric* objects have been deleted.
/// Attempting to delete a family before its metrics will return an error.
///
/// \param family The metric family object to delete.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_MetricFamilyDelete
(
TRITONSERVER_MetricFamily
*
family
);
/// Create a new metric object. The caller takes ownership of the
/// TRITONSERVER_Metric object and must call
/// TRITONSERVER_MetricDelete to release the object. The caller is also
/// responsible for ownership of the labels passed in. Each label can be deleted
/// immediately after creating the metric with TRITONSERVER_ParameterDelete
/// if not re-using the labels.
///
/// \param metric Returns the new metric object.
/// \param family The metric family to add this new metric to.
/// \param labels The array of labels to associate with this new metric.
/// \param label_count The number of labels.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_MetricNew
(
TRITONSERVER_Metric
**
metric
,
TRITONSERVER_MetricFamily
*
family
,
const
TRITONSERVER_Parameter
**
labels
,
const
uint64_t
label_count
);
/// Delete a metric object.
/// All TRITONSERVER_Metric* objects should be deleted BEFORE their
/// corresponding TRITONSERVER_MetricFamily* objects have been deleted.
/// If a family is deleted before its metrics, an error will be returned.
///
/// \param metric The metric object to delete.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_MetricDelete
(
TRITONSERVER_Metric
*
metric
);
/// Get the current value of a metric object.
/// Supports metrics of kind TRITONSERVER_METRIC_KIND_COUNTER
/// and TRITONSERVER_METRIC_KIND_GAUGE, and returns
/// TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.
///
/// \param metric The metric object to query.
/// \param value Returns the current value of the metric object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_MetricValue
(
TRITONSERVER_Metric
*
metric
,
double
*
value
);
/// Increment the current value of metric by value.
/// Supports metrics of kind TRITONSERVER_METRIC_KIND_GAUGE for any value,
/// and TRITONSERVER_METRIC_KIND_COUNTER for non-negative values. Returns
/// TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind
/// and TRITONSERVER_ERROR_INVALID_ARG for negative values on a
/// TRITONSERVER_METRIC_KIND_COUNTER metric.
///
/// \param metric The metric object to update.
/// \param value The amount to increment the metric's value by.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_MetricIncrement
(
TRITONSERVER_Metric
*
metric
,
double
value
);
/// Set the current value of metric to value.
/// Supports metrics of kind TRITONSERVER_METRIC_KIND_GAUGE and returns
/// TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.
///
/// \param metric The metric object to update.
/// \param value The amount to set metric's value to.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_MetricSet
(
TRITONSERVER_Metric
*
metric
,
double
value
);
/// Get the TRITONSERVER_MetricKind of metric and its corresponding family.
///
/// \param metric The metric object to query.
/// \param kind Returns the TRITONSERVER_MetricKind of metric.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC
TRITONSERVER_Error
*
TRITONSERVER_GetMetricKind
(
TRITONSERVER_Metric
*
metric
,
TRITONSERVER_MetricKind
*
kind
);
#ifdef __cplusplus
}
#endif
3rdparty/core-r22.12/src/backend_config.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "backend_config.h"
#include "status.h"
#include "triton/common/logging.h"
#include "triton/common/model_config.h"
namespace
triton
{
namespace
core
{
namespace
{
Status
GetTFSpecializedBackendName
(
const
triton
::
common
::
BackendCmdlineConfigMap
&
config_map
,
std
::
string
*
specialized_name
)
{
std
::
string
tf_version_str
=
"2"
;
const
auto
&
itr
=
config_map
.
find
(
"tensorflow"
);
if
(
itr
!=
config_map
.
end
())
{
if
(
BackendConfiguration
(
itr
->
second
,
"version"
,
&
tf_version_str
).
IsOk
())
{
if
((
tf_version_str
!=
"1"
)
&&
(
tf_version_str
!=
"2"
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected TensorFlow library version '"
+
tf_version_str
+
"', expects 1 or 2."
);
}
}
}
*
specialized_name
+=
tf_version_str
;
return
Status
::
Success
;
}
}
// namespace
Status
BackendConfiguration
(
const
triton
::
common
::
BackendCmdlineConfig
&
config
,
const
std
::
string
&
key
,
std
::
string
*
val
)
{
for
(
const
auto
&
pr
:
config
)
{
if
(
pr
.
first
==
key
)
{
*
val
=
pr
.
second
;
return
Status
::
Success
;
}
}
return
Status
(
Status
::
Code
::
INTERNAL
,
std
::
string
(
"unable to find common backend configuration for '"
)
+
key
+
"'"
);
}
Status
BackendConfigurationParseStringToDouble
(
const
std
::
string
&
str
,
double
*
val
)
{
try
{
*
val
=
std
::
stod
(
str
);
}
catch
(...)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"unable to parse common backend configuration as double"
);
}
return
Status
::
Success
;
}
Status
BackendConfigurationParseStringToBool
(
const
std
::
string
&
str
,
bool
*
val
)
{
try
{
std
::
string
lowercase_str
{
str
};
std
::
transform
(
lowercase_str
.
begin
(),
lowercase_str
.
end
(),
lowercase_str
.
begin
(),
[](
unsigned
char
c
)
{
return
std
::
tolower
(
c
);
});
*
val
=
(
lowercase_str
==
"true"
);
}
catch
(...)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"unable to parse common backend configuration as bool"
);
}
return
Status
::
Success
;
}
Status
BackendConfigurationGlobalBackendsDirectory
(
const
triton
::
common
::
BackendCmdlineConfigMap
&
config_map
,
std
::
string
*
dir
)
{
const
auto
&
itr
=
config_map
.
find
(
std
::
string
());
if
(
itr
==
config_map
.
end
())
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"unable to find global backends directory configuration"
);
}
RETURN_IF_ERROR
(
BackendConfiguration
(
itr
->
second
,
"backend-directory"
,
dir
));
return
Status
::
Success
;
}
Status
BackendConfigurationMinComputeCapability
(
const
triton
::
common
::
BackendCmdlineConfigMap
&
config_map
,
double
*
mcc
)
{
#ifdef TRITON_ENABLE_GPU
*
mcc
=
TRITON_MIN_COMPUTE_CAPABILITY
;
#else
*
mcc
=
0
;
#endif // TRITON_ENABLE_GPU
const
auto
&
itr
=
config_map
.
find
(
std
::
string
());
if
(
itr
==
config_map
.
end
())
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"unable to find common backend configuration"
);
}
std
::
string
min_compute_capability_str
;
RETURN_IF_ERROR
(
BackendConfiguration
(
itr
->
second
,
"min-compute-capability"
,
&
min_compute_capability_str
));
RETURN_IF_ERROR
(
BackendConfigurationParseStringToDouble
(
min_compute_capability_str
,
mcc
));
return
Status
::
Success
;
}
Status
BackendConfigurationAutoCompleteConfig
(
const
triton
::
common
::
BackendCmdlineConfigMap
&
config_map
,
bool
*
acc
)
{
const
auto
&
itr
=
config_map
.
find
(
std
::
string
());
if
(
itr
==
config_map
.
end
())
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"unable to find auto-complete configuration"
);
}
std
::
string
auto_complete_config_str
;
RETURN_IF_ERROR
(
BackendConfiguration
(
itr
->
second
,
"auto-complete-config"
,
&
auto_complete_config_str
));
RETURN_IF_ERROR
(
BackendConfigurationParseStringToBool
(
auto_complete_config_str
,
acc
));
return
Status
::
Success
;
}
Status
BackendConfigurationSpecializeBackendName
(
const
triton
::
common
::
BackendCmdlineConfigMap
&
config_map
,
const
std
::
string
&
backend_name
,
std
::
string
*
specialized_name
)
{
*
specialized_name
=
backend_name
;
if
(
backend_name
==
"tensorflow"
)
{
RETURN_IF_ERROR
(
GetTFSpecializedBackendName
(
config_map
,
specialized_name
));
}
return
Status
::
Success
;
}
Status
BackendConfigurationBackendLibraryName
(
const
std
::
string
&
backend_name
,
std
::
string
*
libname
)
{
#ifdef _WIN32
*
libname
=
"triton_"
+
backend_name
+
".dll"
;
#else
*
libname
=
"libtriton_"
+
backend_name
+
".so"
;
#endif
return
Status
::
Success
;
}
Status
BackendConfigurationModelLoadGpuFraction
(
const
triton
::
common
::
BackendCmdlineConfigMap
&
config_map
,
const
int
device_id
,
double
*
memory_limit
)
{
*
memory_limit
=
1.0
;
const
auto
&
itr
=
config_map
.
find
(
std
::
string
());
if
(
itr
==
config_map
.
end
())
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"unable to find global backends directory configuration"
);
}
static
std
::
string
key_prefix
=
"model-load-gpu-limit-device-"
;
std
::
string
memory_limit_str
;
auto
status
=
BackendConfiguration
(
itr
->
second
,
key_prefix
+
std
::
to_string
(
device_id
),
&
memory_limit_str
);
// Allow missing key, default to 1.0 (no limit) if the limit is not specified
if
(
status
.
IsOk
())
{
RETURN_IF_ERROR
(
BackendConfigurationParseStringToDouble
(
memory_limit_str
,
memory_limit
));
}
return
Status
::
Success
;
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/backend_config.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "status.h"
#include "triton/common/model_config.h"
namespace
triton
{
namespace
core
{
/// Get a key's string value from a backend configuration.
Status
BackendConfiguration
(
const
triton
::
common
::
BackendCmdlineConfig
&
config
,
const
std
::
string
&
key
,
std
::
string
*
val
);
/// Convert a backend configuration string value into a double.
Status
BackendConfigurationParseStringToDouble
(
const
std
::
string
&
str
,
double
*
val
);
/// Convert a backend configuration string value into a bool.
Status
BackendConfigurationParseStringToBool
(
const
std
::
string
&
str
,
bool
*
val
);
/// Get the global backends directory from the backend configuration.
Status
BackendConfigurationGlobalBackendsDirectory
(
const
triton
::
common
::
BackendCmdlineConfigMap
&
config_map
,
std
::
string
*
dir
);
/// Get the minimum compute capability from the backend configuration.
Status
BackendConfigurationMinComputeCapability
(
const
triton
::
common
::
BackendCmdlineConfigMap
&
config_map
,
double
*
mcc
);
/// Get the model configuration auto-complete setting from the backend
/// configuration.
Status
BackendConfigurationAutoCompleteConfig
(
const
triton
::
common
::
BackendCmdlineConfigMap
&
config_map
,
bool
*
acc
);
/// Convert a backend name to the specialized version of that name
/// based on the backend configuration. For example, "tensorflow" will
/// convert to either "tensorflow1" or "tensorflow2" depending on how
/// tritonserver is run.
Status
BackendConfigurationSpecializeBackendName
(
const
triton
::
common
::
BackendCmdlineConfigMap
&
config_map
,
const
std
::
string
&
backend_name
,
std
::
string
*
specialized_name
);
/// Return the shared library name for a backend.
Status
BackendConfigurationBackendLibraryName
(
const
std
::
string
&
backend_name
,
std
::
string
*
libname
);
/// Get GPU memory limit fraction for model loading
/// from the backend configuration.
Status
BackendConfigurationModelLoadGpuFraction
(
const
triton
::
common
::
BackendCmdlineConfigMap
&
config_map
,
const
int
device_id
,
double
*
memory_limit
);
}}
// namespace triton::core
3rdparty/core-r22.12/src/backend_manager.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "backend_manager.h"
#include "backend_memory_manager.h"
#include "server_message.h"
#include "shared_library.h"
#include "triton/common/logging.h"
// For unknown reason, windows will not export the TRITONBACKEND_*
// functions declared with dllexport in tritonbackend.h. To get those
// functions exported it is (also?) necessary to mark the definitions
// in this file with dllexport as well.
#if defined(_MSC_VER)
#define TRITONAPI_DECLSPEC __declspec(dllexport)
#elif defined(__GNUC__)
#define TRITONAPI_DECLSPEC __attribute__((__visibility__("default")))
#else
#define TRITONAPI_DECLSPEC
#endif
namespace
triton
{
namespace
core
{
//
// TritonBackend
//
Status
TritonBackend
::
Create
(
const
std
::
string
&
name
,
const
std
::
string
&
dir
,
const
std
::
string
&
libpath
,
const
triton
::
common
::
BackendCmdlineConfig
&
backend_cmdline_config
,
std
::
shared_ptr
<
TritonBackend
>*
backend
)
{
// Create the JSON representation of the backend configuration.
triton
::
common
::
TritonJson
::
Value
backend_config_json
(
triton
::
common
::
TritonJson
::
ValueType
::
OBJECT
);
if
(
!
backend_cmdline_config
.
empty
())
{
triton
::
common
::
TritonJson
::
Value
cmdline_json
(
backend_config_json
,
triton
::
common
::
TritonJson
::
ValueType
::
OBJECT
);
for
(
const
auto
&
pr
:
backend_cmdline_config
)
{
RETURN_IF_ERROR
(
cmdline_json
.
AddString
(
pr
.
first
.
c_str
(),
pr
.
second
));
}
RETURN_IF_ERROR
(
backend_config_json
.
Add
(
"cmdline"
,
std
::
move
(
cmdline_json
)));
}
TritonServerMessage
backend_config
(
backend_config_json
);
auto
local_backend
=
std
::
shared_ptr
<
TritonBackend
>
(
new
TritonBackend
(
name
,
dir
,
libpath
,
backend_config
));
// Load the library and initialize all the entrypoints
RETURN_IF_ERROR
(
local_backend
->
LoadBackendLibrary
());
// Backend initialization is optional... The TRITONBACKEND_Backend
// object is this TritonBackend object. We must set set shared
// library path to point to the backend directory in case the
// backend library attempts to load additional shared libaries.
if
(
local_backend
->
backend_init_fn_
!=
nullptr
)
{
std
::
unique_ptr
<
SharedLibrary
>
slib
;
RETURN_IF_ERROR
(
SharedLibrary
::
Acquire
(
&
slib
));
RETURN_IF_ERROR
(
slib
->
SetLibraryDirectory
(
local_backend
->
dir_
));
TRITONSERVER_Error
*
err
=
local_backend
->
backend_init_fn_
(
reinterpret_cast
<
TRITONBACKEND_Backend
*>
(
local_backend
.
get
()));
RETURN_IF_ERROR
(
slib
->
ResetLibraryDirectory
());
RETURN_IF_TRITONSERVER_ERROR
(
err
);
}
local_backend
->
UpdateAttributes
();
*
backend
=
std
::
move
(
local_backend
);
return
Status
::
Success
;
}
Status
TritonBackend
::
UpdateAttributes
()
{
if
(
backend_attri_fn_
==
nullptr
)
{
return
Status
::
Success
;
}
// Create an Attribute object for the backend to fill, note that it copies
// some fields from 'attributes_' while the others use default value. This
// is an ad hoc way to determine whether the attribute is set by the backend
// and keep / update current value.
Attribute
latest
;
latest
.
exec_policy_
=
attributes_
.
exec_policy_
;
RETURN_IF_TRITONSERVER_ERROR
(
backend_attri_fn_
(
reinterpret_cast
<
TRITONBACKEND_Backend
*>
(
this
),
reinterpret_cast
<
TRITONBACKEND_BackendAttribute
*>
(
&
latest
)));
// Update attributes that were set
attributes_
.
exec_policy_
=
latest
.
exec_policy_
;
if
(
!
latest
.
preferred_groups_
.
empty
())
{
attributes_
.
preferred_groups_
=
latest
.
preferred_groups_
;
}
return
Status
::
Success
;
}
TritonBackend
::
TritonBackend
(
const
std
::
string
&
name
,
const
std
::
string
&
dir
,
const
std
::
string
&
libpath
,
const
TritonServerMessage
&
backend_config
)
:
name_
(
name
),
dir_
(
dir
),
libpath_
(
libpath
),
backend_config_
(
backend_config
),
state_
(
nullptr
)
{
ClearHandles
();
}
TritonBackend
::~
TritonBackend
()
{
LOG_VERBOSE
(
1
)
<<
"unloading backend '"
<<
name_
<<
"'"
;
// Backend finalization is optional... The TRITONBACKEND_Backend
// object is this TritonBackend object.
if
(
backend_fini_fn_
!=
nullptr
)
{
LOG_TRITONSERVER_ERROR
(
backend_fini_fn_
(
reinterpret_cast
<
TRITONBACKEND_Backend
*>
(
this
)),
"failed finalizing backend"
);
}
ClearHandles
();
}
void
TritonBackend
::
ClearHandles
()
{
dlhandle_
=
nullptr
;
backend_init_fn_
=
nullptr
;
backend_fini_fn_
=
nullptr
;
backend_attri_fn_
=
nullptr
;
model_init_fn_
=
nullptr
;
model_fini_fn_
=
nullptr
;
inst_init_fn_
=
nullptr
;
inst_fini_fn_
=
nullptr
;
inst_exec_fn_
=
nullptr
;
}
Status
TritonBackend
::
LoadBackendLibrary
()
{
TritonBackendInitFn_t
bifn
;
TritonBackendFiniFn_t
bffn
;
TritonBackendAttriFn_t
bafn
;
TritonModelInitFn_t
mifn
;
TritonModelFiniFn_t
mffn
;
TritonModelInstanceInitFn_t
iifn
;
TritonModelInstanceFiniFn_t
iffn
;
TritonModelInstanceExecFn_t
iefn
;
{
std
::
unique_ptr
<
SharedLibrary
>
slib
;
RETURN_IF_ERROR
(
SharedLibrary
::
Acquire
(
&
slib
));
RETURN_IF_ERROR
(
slib
->
OpenLibraryHandle
(
libpath_
,
&
dlhandle_
));
// Backend initialize and finalize functions, optional
RETURN_IF_ERROR
(
slib
->
GetEntrypoint
(
dlhandle_
,
"TRITONBACKEND_Initialize"
,
true
/* optional */
,
reinterpret_cast
<
void
**>
(
&
bifn
)));
RETURN_IF_ERROR
(
slib
->
GetEntrypoint
(
dlhandle_
,
"TRITONBACKEND_Finalize"
,
true
/* optional */
,
reinterpret_cast
<
void
**>
(
&
bffn
)));
// Backend attribute function, optional
RETURN_IF_ERROR
(
slib
->
GetEntrypoint
(
dlhandle_
,
"TRITONBACKEND_GetBackendAttribute"
,
true
/* optional */
,
reinterpret_cast
<
void
**>
(
&
bafn
)));
// Model initialize and finalize functions, optional
RETURN_IF_ERROR
(
slib
->
GetEntrypoint
(
dlhandle_
,
"TRITONBACKEND_ModelInitialize"
,
true
/* optional */
,
reinterpret_cast
<
void
**>
(
&
mifn
)));
RETURN_IF_ERROR
(
slib
->
GetEntrypoint
(
dlhandle_
,
"TRITONBACKEND_ModelFinalize"
,
true
/* optional */
,
reinterpret_cast
<
void
**>
(
&
mffn
)));
// Model instance initialize and finalize functions, optional
RETURN_IF_ERROR
(
slib
->
GetEntrypoint
(
dlhandle_
,
"TRITONBACKEND_ModelInstanceInitialize"
,
true
/* optional */
,
reinterpret_cast
<
void
**>
(
&
iifn
)));
RETURN_IF_ERROR
(
slib
->
GetEntrypoint
(
dlhandle_
,
"TRITONBACKEND_ModelInstanceFinalize"
,
true
/* optional */
,
reinterpret_cast
<
void
**>
(
&
iffn
)));
// Model instance execute function, required
RETURN_IF_ERROR
(
slib
->
GetEntrypoint
(
dlhandle_
,
"TRITONBACKEND_ModelInstanceExecute"
,
false
/* optional */
,
reinterpret_cast
<
void
**>
(
&
iefn
)));
}
backend_init_fn_
=
bifn
;
backend_fini_fn_
=
bffn
;
backend_attri_fn_
=
bafn
;
model_init_fn_
=
mifn
;
model_fini_fn_
=
mffn
;
inst_init_fn_
=
iifn
;
inst_fini_fn_
=
iffn
;
inst_exec_fn_
=
iefn
;
return
Status
::
Success
;
}
extern
"C"
{
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ApiVersion
(
uint32_t
*
major
,
uint32_t
*
minor
)
{
*
major
=
TRITONBACKEND_API_VERSION_MAJOR
;
*
minor
=
TRITONBACKEND_API_VERSION_MINOR
;
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendName
(
TRITONBACKEND_Backend
*
backend
,
const
char
**
name
)
{
TritonBackend
*
tb
=
reinterpret_cast
<
TritonBackend
*>
(
backend
);
*
name
=
tb
->
Name
().
c_str
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendConfig
(
TRITONBACKEND_Backend
*
backend
,
TRITONSERVER_Message
**
backend_config
)
{
TritonBackend
*
tb
=
reinterpret_cast
<
TritonBackend
*>
(
backend
);
*
backend_config
=
const_cast
<
TRITONSERVER_Message
*>
(
reinterpret_cast
<
const
TRITONSERVER_Message
*>
(
&
tb
->
BackendConfig
()));
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendExecutionPolicy
(
TRITONBACKEND_Backend
*
backend
,
TRITONBACKEND_ExecutionPolicy
*
policy
)
{
TritonBackend
*
tb
=
reinterpret_cast
<
TritonBackend
*>
(
backend
);
*
policy
=
tb
->
ExecutionPolicy
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendSetExecutionPolicy
(
TRITONBACKEND_Backend
*
backend
,
TRITONBACKEND_ExecutionPolicy
policy
)
{
TritonBackend
*
tb
=
reinterpret_cast
<
TritonBackend
*>
(
backend
);
tb
->
SetExecutionPolicy
(
policy
);
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendArtifacts
(
TRITONBACKEND_Backend
*
backend
,
TRITONBACKEND_ArtifactType
*
artifact_type
,
const
char
**
location
)
{
TritonBackend
*
tb
=
reinterpret_cast
<
TritonBackend
*>
(
backend
);
*
artifact_type
=
TRITONBACKEND_ARTIFACT_FILESYSTEM
;
*
location
=
tb
->
Directory
().
c_str
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendMemoryManager
(
TRITONBACKEND_Backend
*
backend
,
TRITONBACKEND_MemoryManager
**
manager
)
{
static
TritonMemoryManager
gMemoryManager
;
*
manager
=
reinterpret_cast
<
TRITONBACKEND_MemoryManager
*>
(
&
gMemoryManager
);
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendState
(
TRITONBACKEND_Backend
*
backend
,
void
**
state
)
{
TritonBackend
*
tb
=
reinterpret_cast
<
TritonBackend
*>
(
backend
);
*
state
=
tb
->
State
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendSetState
(
TRITONBACKEND_Backend
*
backend
,
void
*
state
)
{
TritonBackend
*
tb
=
reinterpret_cast
<
TritonBackend
*>
(
backend
);
tb
->
SetState
(
state
);
return
nullptr
;
// success
}
}
// extern C
//
// TritonBackendManager
//
static
std
::
weak_ptr
<
TritonBackendManager
>
backend_manager_
;
static
std
::
mutex
mu_
;
Status
TritonBackendManager
::
Create
(
std
::
shared_ptr
<
TritonBackendManager
>*
manager
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mu_
);
// If there is already a manager then we just use it...
*
manager
=
backend_manager_
.
lock
();
if
(
*
manager
!=
nullptr
)
{
return
Status
::
Success
;
}
manager
->
reset
(
new
TritonBackendManager
());
backend_manager_
=
*
manager
;
return
Status
::
Success
;
}
Status
TritonBackendManager
::
CreateBackend
(
const
std
::
string
&
name
,
const
std
::
string
&
dir
,
const
std
::
string
&
libpath
,
const
triton
::
common
::
BackendCmdlineConfig
&
backend_cmdline_config
,
std
::
shared_ptr
<
TritonBackend
>*
backend
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mu_
);
const
auto
&
itr
=
backend_map_
.
find
(
libpath
);
if
(
itr
!=
backend_map_
.
end
())
{
*
backend
=
itr
->
second
;
return
Status
::
Success
;
}
RETURN_IF_ERROR
(
TritonBackend
::
Create
(
name
,
dir
,
libpath
,
backend_cmdline_config
,
backend
));
backend_map_
.
insert
({
libpath
,
*
backend
});
return
Status
::
Success
;
}
Status
TritonBackendManager
::
BackendState
(
std
::
unique_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>>*
backend_state
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mu_
);
std
::
unique_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>>
backend_state_map
(
new
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
);
for
(
const
auto
&
backend_pair
:
backend_map_
)
{
auto
&
libpath
=
backend_pair
.
first
;
auto
backend
=
backend_pair
.
second
;
const
char
*
backend_config
;
size_t
backend_config_size
;
backend
->
BackendConfig
().
Serialize
(
&
backend_config
,
&
backend_config_size
);
backend_state_map
->
insert
(
{
backend
->
Name
(),
std
::
vector
<
std
::
string
>
{
libpath
,
backend_config
}});
}
*
backend_state
=
std
::
move
(
backend_state_map
);
return
Status
::
Success
;
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/backend_manager.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <memory>
#include <mutex>
#include <string>
#include <unordered_map>
#include "constants.h"
#include "server_message.h"
#include "status.h"
#include "triton/common/model_config.h"
#include "tritonserver_apis.h"
namespace
triton
{
namespace
core
{
//
// Proxy to a backend shared library.
//
class
TritonBackend
{
public:
struct
Attribute
{
Attribute
()
:
exec_policy_
(
TRITONBACKEND_EXECUTION_BLOCKING
)
{}
TRITONBACKEND_ExecutionPolicy
exec_policy_
;
std
::
vector
<
inference
::
ModelInstanceGroup
>
preferred_groups_
;
};
typedef
TRITONSERVER_Error
*
(
*
TritonModelInitFn_t
)(
TRITONBACKEND_Model
*
model
);
typedef
TRITONSERVER_Error
*
(
*
TritonModelFiniFn_t
)(
TRITONBACKEND_Model
*
model
);
typedef
TRITONSERVER_Error
*
(
*
TritonModelInstanceInitFn_t
)(
TRITONBACKEND_ModelInstance
*
instance
);
typedef
TRITONSERVER_Error
*
(
*
TritonModelInstanceFiniFn_t
)(
TRITONBACKEND_ModelInstance
*
instance
);
typedef
TRITONSERVER_Error
*
(
*
TritonModelInstanceExecFn_t
)(
TRITONBACKEND_ModelInstance
*
instance
,
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_cnt
);
static
Status
Create
(
const
std
::
string
&
name
,
const
std
::
string
&
dir
,
const
std
::
string
&
libpath
,
const
triton
::
common
::
BackendCmdlineConfig
&
backend_cmdline_config
,
std
::
shared_ptr
<
TritonBackend
>*
backend
);
~
TritonBackend
();
const
std
::
string
&
Name
()
const
{
return
name_
;
}
const
std
::
string
&
Directory
()
const
{
return
dir_
;
}
const
TritonServerMessage
&
BackendConfig
()
const
{
return
backend_config_
;
}
const
Attribute
&
BackendAttributes
()
const
{
return
attributes_
;
}
TRITONBACKEND_ExecutionPolicy
ExecutionPolicy
()
const
{
return
attributes_
.
exec_policy_
;
}
void
SetExecutionPolicy
(
const
TRITONBACKEND_ExecutionPolicy
policy
)
{
attributes_
.
exec_policy_
=
policy
;
}
void
*
State
()
{
return
state_
;
}
void
SetState
(
void
*
state
)
{
state_
=
state
;
}
TritonModelInitFn_t
ModelInitFn
()
const
{
return
model_init_fn_
;
}
TritonModelFiniFn_t
ModelFiniFn
()
const
{
return
model_fini_fn_
;
}
TritonModelInstanceInitFn_t
ModelInstanceInitFn
()
const
{
return
inst_init_fn_
;
}
TritonModelInstanceFiniFn_t
ModelInstanceFiniFn
()
const
{
return
inst_fini_fn_
;
}
TritonModelInstanceExecFn_t
ModelInstanceExecFn
()
const
{
return
inst_exec_fn_
;
}
private:
typedef
TRITONSERVER_Error
*
(
*
TritonBackendInitFn_t
)(
TRITONBACKEND_Backend
*
backend
);
typedef
TRITONSERVER_Error
*
(
*
TritonBackendFiniFn_t
)(
TRITONBACKEND_Backend
*
backend
);
typedef
TRITONSERVER_Error
*
(
*
TritonBackendAttriFn_t
)(
TRITONBACKEND_Backend
*
backend
,
TRITONBACKEND_BackendAttribute
*
backend_attributes
);
TritonBackend
(
const
std
::
string
&
name
,
const
std
::
string
&
dir
,
const
std
::
string
&
libpath
,
const
TritonServerMessage
&
backend_config
);
void
ClearHandles
();
Status
LoadBackendLibrary
();
Status
UpdateAttributes
();
// The name of the backend.
const
std
::
string
name_
;
// Full path to the directory holding backend shared library and
// other artifacts.
const
std
::
string
dir_
;
// Full path to the backend shared library.
const
std
::
string
libpath_
;
// Backend configuration as JSON
TritonServerMessage
backend_config_
;
// backend attributes
Attribute
attributes_
;
// dlopen / dlsym handles
void
*
dlhandle_
;
TritonBackendInitFn_t
backend_init_fn_
;
TritonBackendFiniFn_t
backend_fini_fn_
;
TritonBackendAttriFn_t
backend_attri_fn_
;
TritonModelInitFn_t
model_init_fn_
;
TritonModelFiniFn_t
model_fini_fn_
;
TritonModelInstanceInitFn_t
inst_init_fn_
;
TritonModelInstanceFiniFn_t
inst_fini_fn_
;
TritonModelInstanceExecFn_t
inst_exec_fn_
;
// Opaque state associated with the backend.
void
*
state_
;
};
//
// Manage communication with Triton backends and their lifecycle.
//
class
TritonBackendManager
{
public:
static
Status
Create
(
std
::
shared_ptr
<
TritonBackendManager
>*
manager
);
Status
CreateBackend
(
const
std
::
string
&
name
,
const
std
::
string
&
dir
,
const
std
::
string
&
libpath
,
const
triton
::
common
::
BackendCmdlineConfig
&
backend_cmdline_config
,
std
::
shared_ptr
<
TritonBackend
>*
backend
);
Status
BackendState
(
std
::
unique_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>>*
backend_state
);
private:
DISALLOW_COPY_AND_ASSIGN
(
TritonBackendManager
);
TritonBackendManager
()
=
default
;
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
TritonBackend
>>
backend_map_
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/backend_memory_manager.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2020-2022, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "backend_memory_manager.h"
#include "pinned_memory_manager.h"
#include "status.h"
#include "tritonserver_apis.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#include "cuda_memory_manager.h"
#endif // TRITON_ENABLE_GPU
// For unknown reason, windows will not export the TRITONBACKEND_*
// functions declared with dllexport in tritonbackend.h. To get those
// functions exported it is (also?) necessary to mark the definitions
// in this file with dllexport as well.
#if defined(_MSC_VER)
#define TRITONAPI_DECLSPEC __declspec(dllexport)
#elif defined(__GNUC__)
#define TRITONAPI_DECLSPEC __attribute__((__visibility__("default")))
#else
#define TRITONAPI_DECLSPEC
#endif
namespace
triton
{
namespace
core
{
extern
"C"
{
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_MemoryManagerAllocate
(
TRITONBACKEND_MemoryManager
*
manager
,
void
**
buffer
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
,
const
uint64_t
byte_size
)
{
switch
(
memory_type
)
{
case
TRITONSERVER_MEMORY_GPU
:
#ifdef TRITON_ENABLE_GPU
{
auto
status
=
CudaMemoryManager
::
Alloc
(
buffer
,
byte_size
,
memory_type_id
);
if
(
!
status
.
IsOk
())
{
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
ErrorCode
()),
status
.
Message
().
c_str
());
}
break
;
}
#else
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
"GPU memory allocation not supported"
);
#endif // TRITON_ENABLE_GPU
case
TRITONSERVER_MEMORY_CPU_PINNED
:
#ifdef TRITON_ENABLE_GPU
{
TRITONSERVER_MemoryType
mt
=
memory_type
;
auto
status
=
PinnedMemoryManager
::
Alloc
(
buffer
,
byte_size
,
&
mt
,
false
);
if
(
!
status
.
IsOk
())
{
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
ErrorCode
()),
status
.
Message
().
c_str
());
}
break
;
}
#else
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
"Pinned memory allocation not supported"
);
#endif // TRITON_ENABLE_GPU
case
TRITONSERVER_MEMORY_CPU
:
{
*
buffer
=
malloc
(
byte_size
);
if
(
*
buffer
==
nullptr
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNAVAILABLE
,
"CPU memory allocation failed"
);
}
break
;
}
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_MemoryManagerFree
(
TRITONBACKEND_MemoryManager
*
manager
,
void
*
buffer
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
)
{
switch
(
memory_type
)
{
case
TRITONSERVER_MEMORY_GPU
:
{
#ifdef TRITON_ENABLE_GPU
auto
status
=
CudaMemoryManager
::
Free
(
buffer
,
memory_type_id
);
if
(
!
status
.
IsOk
())
{
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
#endif // TRITON_ENABLE_GPU
break
;
}
case
TRITONSERVER_MEMORY_CPU_PINNED
:
{
#ifdef TRITON_ENABLE_GPU
auto
status
=
PinnedMemoryManager
::
Free
(
buffer
);
if
(
!
status
.
IsOk
())
{
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
#endif // TRITON_ENABLE_GPU
break
;
}
case
TRITONSERVER_MEMORY_CPU
:
free
(
buffer
);
break
;
}
return
nullptr
;
// success
}
}
// extern C
}}
// namespace triton::core
3rdparty/core-r22.12/src/backend_memory_manager.h
deleted
100644 → 0
View file @
d592fbea
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
namespace
triton
{
namespace
core
{
// Currently there is just a global memory manager that is used for
// all backends and which simply forwards requests on to the core
// memory manager.
struct
TritonMemoryManager
{
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/backend_model.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "backend_model.h"
#include <vector>
#include "backend_config.h"
#include "backend_model_instance.h"
#include "dynamic_batch_scheduler.h"
#include "filesystem.h"
#include "model_config_utils.h"
#include "numa_utils.h"
#include "sequence_batch_scheduler.h"
#include "sequence_state.h"
#include "server.h"
#include "server_message.h"
#include "shared_library.h"
#include "triton/common/logging.h"
#include "tritonserver_apis.h"
// For unknown reason, windows will not export the TRITONBACKEND_*
// functions declared with dllexport in tritonbackend.h. To get those
// functions exported it is (also?) necessary to mark the definitions
// in this file with dllexport as well.
#if defined(_MSC_VER)
#define TRITONAPI_DECLSPEC __declspec(dllexport)
#elif defined(__GNUC__)
#define TRITONAPI_DECLSPEC __attribute__((__visibility__("default")))
#else
#define TRITONAPI_DECLSPEC
#endif
namespace
triton
{
namespace
core
{
Status
TritonModel
::
Create
(
InferenceServer
*
server
,
const
std
::
string
&
model_path
,
const
triton
::
common
::
BackendCmdlineConfigMap
&
backend_cmdline_config_map
,
const
triton
::
common
::
HostPolicyCmdlineConfigMap
&
host_policy_map
,
const
std
::
string
&
model_name
,
const
int64_t
version
,
inference
::
ModelConfig
model_config
,
const
bool
is_config_provided
,
std
::
unique_ptr
<
TritonModel
>*
model
)
{
model
->
reset
();
// The model configuration must specify a backend. The name of the
// corresponding shared library must be libtriton_<backend>.so.
if
(
model_config
.
backend
().
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"must specify 'backend' for '"
+
model_config
.
name
()
+
"'"
);
}
// Localize the content of the model repository corresponding to
// 'model_name'. This model holds a handle to the localized content
// so that it persists as long as the model is loaded.
std
::
shared_ptr
<
LocalizedPath
>
localized_model_dir
;
RETURN_IF_ERROR
(
LocalizePath
(
model_path
,
&
localized_model_dir
));
// Localize paths in backend model config
// [FIXME] Remove once a more permanent solution is implemented (DLIS-4211)
RETURN_IF_ERROR
(
LocalizePythonBackendExecutionEnvironmentPath
(
model_path
,
&
model_config
,
&
localized_model_dir
));
// Get some internal configuration values needed for initialization.
std
::
string
backend_dir
;
RETURN_IF_ERROR
(
BackendConfigurationGlobalBackendsDirectory
(
backend_cmdline_config_map
,
&
backend_dir
));
bool
auto_complete_config
=
false
;
RETURN_IF_ERROR
(
BackendConfigurationAutoCompleteConfig
(
backend_cmdline_config_map
,
&
auto_complete_config
));
double
min_compute_capability
=
0
;
RETURN_IF_ERROR
(
BackendConfigurationMinComputeCapability
(
backend_cmdline_config_map
,
&
min_compute_capability
));
std
::
string
specialized_backend_name
;
RETURN_IF_ERROR
(
BackendConfigurationSpecializeBackendName
(
backend_cmdline_config_map
,
model_config
.
backend
(),
&
specialized_backend_name
));
std
::
string
backend_libname
;
RETURN_IF_ERROR
(
BackendConfigurationBackendLibraryName
(
specialized_backend_name
,
&
backend_libname
));
// Get the path to the backend shared library. Search path is
// version directory, model directory, global backend directory.
const
auto
localized_model_path
=
localized_model_dir
->
Path
();
const
auto
version_path
=
JoinPath
({
localized_model_path
,
std
::
to_string
(
version
)});
const
std
::
string
global_path
=
JoinPath
({
backend_dir
,
specialized_backend_name
});
const
std
::
vector
<
std
::
string
>
search_paths
=
{
version_path
,
localized_model_path
,
global_path
};
std
::
string
backend_libdir
;
std
::
string
backend_libpath
;
for
(
const
auto
&
path
:
search_paths
)
{
const
auto
full_path
=
JoinPath
({
path
,
backend_libname
});
bool
exists
=
false
;
RETURN_IF_ERROR
(
FileExists
(
full_path
,
&
exists
));
if
(
exists
)
{
backend_libdir
=
path
;
backend_libpath
=
full_path
;
break
;
}
}
if
(
backend_libpath
.
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unable to find '"
+
backend_libname
+
"' for model '"
+
model_config
.
name
()
+
"', searched: "
+
version_path
+
", "
+
model_path
+
", "
+
global_path
);
}
// Resolve the global backend configuration with the specific backend
// configuration
triton
::
common
::
BackendCmdlineConfig
config
;
RETURN_IF_ERROR
(
ResolveBackendConfigs
(
backend_cmdline_config_map
,
model_config
.
backend
(),
config
));
RETURN_IF_ERROR
(
SetBackendConfigDefaults
(
config
));
std
::
shared_ptr
<
TritonBackend
>
backend
;
RETURN_IF_ERROR
(
server
->
BackendManager
()
->
CreateBackend
(
model_config
.
backend
(),
backend_libdir
,
backend_libpath
,
config
,
&
backend
));
// Normalize backend-dependent config
{
const
auto
&
attributes
=
backend
->
BackendAttributes
();
// [WIP] formalize config normalization / validation
RETURN_IF_ERROR
(
NormalizeInstanceGroup
(
min_compute_capability
,
attributes
.
preferred_groups_
,
&
model_config
));
RETURN_IF_ERROR
(
ValidateInstanceGroup
(
model_config
,
min_compute_capability
));
}
// Create and initialize the model.
std
::
unique_ptr
<
TritonModel
>
local_model
(
new
TritonModel
(
server
,
localized_model_dir
,
backend
,
min_compute_capability
,
version
,
model_config
,
auto_complete_config
));
TritonModel
*
raw_local_model
=
local_model
.
get
();
// Model initialization is optional... The TRITONBACKEND_Model
// object is this TritonModel object. We must set set shared library
// path to point to the backend directory in case the backend
// library attempts to load additional shared libaries.
if
(
backend
->
ModelInitFn
()
!=
nullptr
)
{
std
::
unique_ptr
<
SharedLibrary
>
slib
;
RETURN_IF_ERROR
(
SharedLibrary
::
Acquire
(
&
slib
));
RETURN_IF_ERROR
(
slib
->
SetLibraryDirectory
(
backend
->
Directory
()));
TRITONSERVER_Error
*
err
=
backend
->
ModelInitFn
()(
reinterpret_cast
<
TRITONBACKEND_Model
*>
(
raw_local_model
));
RETURN_IF_ERROR
(
slib
->
ResetLibraryDirectory
());
RETURN_IF_TRITONSERVER_ERROR
(
err
);
}
// Initialize the model for Triton core usage
RETURN_IF_ERROR
(
local_model
->
Init
(
is_config_provided
));
bool
device_blocking
=
false
;
if
(
local_model
->
backend_
->
ExecutionPolicy
()
==
TRITONBACKEND_EXECUTION_DEVICE_BLOCKING
)
{
if
(
model_config
.
has_sequence_batching
())
{
LOG_INFO
<<
"Overriding execution policy to "
"
\"
TRITONBACKEND_EXECUTION_BLOCKING
\"
for sequence model
\"
"
<<
model_config
.
name
()
<<
"
\"
"
;
}
else
{
device_blocking
=
true
;
}
}
// Create and initialize the model instances for this model.
RETURN_IF_ERROR
(
TritonModelInstance
::
CreateInstances
(
raw_local_model
,
backend_cmdline_config_map
,
host_policy_map
,
model_config
,
device_blocking
));
RETURN_IF_ERROR
(
local_model
->
SetConfiguredScheduler
());
*
model
=
std
::
move
(
local_model
);
return
Status
::
Success
;
}
Status
TritonModel
::
ResolveBackendConfigs
(
const
triton
::
common
::
BackendCmdlineConfigMap
&
backend_cmdline_config_map
,
const
std
::
string
&
backend_name
,
triton
::
common
::
BackendCmdlineConfig
&
config
)
{
const
auto
&
global_itr
=
backend_cmdline_config_map
.
find
(
std
::
string
());
const
auto
&
specific_itr
=
backend_cmdline_config_map
.
find
(
backend_name
);
if
(
specific_itr
==
backend_cmdline_config_map
.
end
()
&&
global_itr
!=
backend_cmdline_config_map
.
end
())
{
for
(
auto
setting
:
global_itr
->
second
)
{
config
.
push_back
(
setting
);
}
}
else
if
(
specific_itr
!=
backend_cmdline_config_map
.
end
()
&&
global_itr
==
backend_cmdline_config_map
.
end
())
{
for
(
auto
setting
:
specific_itr
->
second
)
{
config
.
push_back
(
setting
);
}
}
else
if
(
specific_itr
!=
backend_cmdline_config_map
.
end
()
&&
global_itr
!=
backend_cmdline_config_map
.
end
())
{
triton
::
common
::
BackendCmdlineConfig
global_backend_config
=
global_itr
->
second
;
triton
::
common
::
BackendCmdlineConfig
specific_backend_config
=
specific_itr
->
second
;
std
::
sort
(
global_backend_config
.
begin
(),
global_backend_config
.
end
());
std
::
sort
(
specific_backend_config
.
begin
(),
specific_backend_config
.
end
());
size_t
global_index
=
0
;
size_t
specific_index
=
0
;
while
(
global_index
<
global_backend_config
.
size
()
&&
specific_index
<
specific_backend_config
.
size
())
{
auto
&
current_global_setting
=
global_backend_config
.
at
(
global_index
);
auto
&
current_specific_setting
=
specific_backend_config
.
at
(
specific_index
);
if
(
current_specific_setting
.
first
.
compare
(
current_global_setting
.
first
)
==
0
)
{
// specific setting overrides global setting
config
.
push_back
(
current_specific_setting
);
++
global_index
;
++
specific_index
;
}
else
if
(
current_specific_setting
.
first
.
compare
(
current_global_setting
.
first
)
<
0
)
{
config
.
push_back
(
current_specific_setting
);
++
specific_index
;
}
else
{
config
.
push_back
(
current_global_setting
);
++
global_index
;
}
}
// add the rest of the global configs
if
(
global_index
<
global_backend_config
.
size
())
{
auto
&
current_global_setting
=
global_backend_config
.
at
(
global_index
);
config
.
push_back
(
current_global_setting
);
}
// add the rest of the specific settings
if
(
specific_index
<
specific_backend_config
.
size
())
{
auto
&
current_specific_setting
=
specific_backend_config
.
at
(
specific_index
);
config
.
push_back
(
current_specific_setting
);
}
}
// else empty config
return
Status
::
Success
;
}
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
backend_config_defaults
(
{{
"default-max-batch-size"
,
"4"
}});
Status
TritonModel
::
SetBackendConfigDefaults
(
triton
::
common
::
BackendCmdlineConfig
&
config
)
{
auto
backend_config_defaults_copy
=
backend_config_defaults
;
for
(
auto
&
setting
:
config
)
{
if
(
setting
.
first
.
compare
(
"default-max-batch-size"
)
==
0
)
{
LOG_VERBOSE
(
1
)
<<
"Found overwritten default setting: "
<<
setting
.
first
<<
","
<<
setting
.
second
;
backend_config_defaults_copy
.
erase
(
setting
.
first
);
}
if
(
backend_config_defaults_copy
.
empty
())
{
break
;
}
}
// Anything left should be added to the config
for
(
const
auto
&
default_setting
:
backend_config_defaults_copy
)
{
LOG_VERBOSE
(
1
)
<<
"Adding default backend config setting: "
<<
default_setting
.
first
<<
","
<<
default_setting
.
second
;
config
.
push_back
(
std
::
make_pair
(
default_setting
.
first
,
default_setting
.
second
));
}
return
Status
::
Success
;
}
Status
TritonModel
::
AddInstance
(
std
::
unique_ptr
<
TritonModelInstance
>&&
instance
,
const
bool
passive
)
{
if
(
passive
)
{
passive_instances_
.
emplace_back
(
std
::
move
(
instance
));
}
else
{
instances_
.
emplace_back
(
std
::
move
(
instance
));
}
return
Status
::
Success
;
}
Status
TritonModel
::
UpdateModelConfig
(
const
uint32_t
config_version
,
TRITONSERVER_Message
*
updated_config_message
)
{
const
char
*
buffer
;
size_t
byte_size
;
RETURN_IF_TRITONSERVER_ERROR
(
TRITONSERVER_MessageSerializeToJson
(
updated_config_message
,
&
buffer
,
&
byte_size
));
inference
::
ModelConfig
updated_config
;
RETURN_IF_ERROR
(
JsonToModelConfig
({
buffer
,
byte_size
},
config_version
,
&
updated_config
));
auto
config
=
Config
();
config
.
set_max_batch_size
(
updated_config
.
max_batch_size
());
auto
inputs_config
=
config
.
mutable_input
();
*
inputs_config
=
updated_config
.
input
();
auto
outputs_config
=
config
.
mutable_output
();
*
outputs_config
=
updated_config
.
output
();
if
(
!
config
.
scheduling_choice_case
())
{
if
(
updated_config
.
has_dynamic_batching
())
{
auto
dynamic_batching_config
=
config
.
mutable_dynamic_batching
();
*
dynamic_batching_config
=
updated_config
.
dynamic_batching
();
}
else
if
(
updated_config
.
has_sequence_batching
())
{
auto
sequence_batching_config
=
config
.
mutable_sequence_batching
();
*
sequence_batching_config
=
updated_config
.
sequence_batching
();
}
else
if
(
updated_config
.
has_ensemble_scheduling
())
{
auto
ensemble_scheduling_config
=
config
.
mutable_ensemble_scheduling
();
*
ensemble_scheduling_config
=
updated_config
.
ensemble_scheduling
();
}
// else do nothing
}
else
if
(
config
.
scheduling_choice_case
()
!=
updated_config
.
scheduling_choice_case
())
{
return
Status
(
triton
::
common
::
Error
::
Code
::
INTERNAL
,
(
std
::
string
(
"Cannot update scheduling choice from "
)
+
std
::
to_string
(
config
.
scheduling_choice_case
())
+
std
::
string
(
" to "
)
+
std
::
to_string
(
config
.
scheduling_choice_case
())
+
std
::
string
(
" when auto-completing."
))
.
c_str
());
}
// else do nothing
// Need to normalize the model configuration for
// populating missing fields.
RETURN_IF_ERROR
(
NormalizeModelConfig
(
min_compute_capability_
,
&
config
));
RETURN_IF_ERROR
(
SetModelConfig
(
config
));
return
Status
::
Success
;
}
Status
TritonModel
::
SetConfiguredScheduler
()
{
std
::
unique_ptr
<
Scheduler
>
scheduler
;
// Need to enforce equal shape batches (i.e. non-ragged batches) if
// the model 1) allows one or more variable-size input tensors that
// are not marked as 'allow_ragged_batch' or 2) has one or more
// shape-tensor inputs. This is not needed if all input shapes are
// non-variable and if there are no shape tensors... so we don't
// enable it in that case for efficiency reasons.
std
::
unordered_map
<
std
::
string
,
bool
>
enforce_equal_shape_tensors
;
for
(
const
auto
input
:
config_
.
input
())
{
if
(
input
.
is_shape_tensor
())
{
enforce_equal_shape_tensors
.
insert
({
input
.
name
(),
true
});
}
else
if
(
!
input
.
allow_ragged_batch
()
&&
(
triton
::
common
::
GetElementCount
(
input
)
==
-
1
))
{
enforce_equal_shape_tensors
.
insert
({
input
.
name
(),
false
});
}
}
// If 'sequence_batching' is configured, then use the SequenceBatchScheduler,
// otherwise use the default DynamicBatchScheduler.
if
(
config_
.
has_sequence_batching
())
{
// Sequence batcher
RETURN_IF_ERROR
(
SequenceBatchScheduler
::
Create
(
this
,
enforce_equal_shape_tensors
,
&
scheduler
));
}
else
if
(
config_
.
has_dynamic_batching
())
{
// Dynamic batcher
RETURN_IF_ERROR
(
DynamicBatchScheduler
::
Create
(
this
,
nullptr
,
0
/*nice*/
,
true
/* dynamic_batching_enabled */
,
config_
.
max_batch_size
(),
enforce_equal_shape_tensors
,
config_
.
dynamic_batching
(),
config_
.
response_cache
().
enable
()
/* response_cache_enable */
,
&
scheduler
));
}
else
{
// Default scheduler. Use dynamic batch scheduler (with batching
// disabled) as the default scheduler.
RETURN_IF_ERROR
(
DynamicBatchScheduler
::
Create
(
this
,
nullptr
,
0
/*nice*/
,
false
/* dynamic_batching_enabled */
,
1
/* max_batch_size */
,
std
::
unordered_map
<
std
::
string
,
bool
>
()
/* enforce_equal_shape_tensors */
,
false
/* preserve_ordering */
,
config_
.
response_cache
().
enable
()
/* response_cache_enable */
,
std
::
set
<
int32_t
>
()
/* preferred_batch_sizes */
,
0
/* max_queue_delay_microseconds */
,
&
scheduler
));
}
return
SetScheduler
(
std
::
move
(
scheduler
));
}
Status
TritonModel
::
Initialize
()
{
for
(
const
auto
&
instance
:
instances_
)
{
RETURN_IF_ERROR
(
instance
->
Initialize
());
}
return
Status
::
Success
;
}
Status
TritonModel
::
WarmUp
()
{
for
(
const
auto
&
instance
:
instances_
)
{
RETURN_IF_ERROR
(
instance
->
WarmUp
());
}
return
Status
::
Success
;
}
TritonModel
::
TritonModel
(
InferenceServer
*
server
,
const
std
::
shared_ptr
<
LocalizedPath
>&
localized_model_dir
,
const
std
::
shared_ptr
<
TritonBackend
>&
backend
,
const
double
min_compute_capability
,
const
int64_t
version
,
const
inference
::
ModelConfig
&
config
,
const
bool
auto_complete_config
)
:
Model
(
min_compute_capability
,
localized_model_dir
->
Path
(),
version
,
config
),
server_
(
server
),
min_compute_capability_
(
min_compute_capability
),
auto_complete_config_
(
auto_complete_config
),
localized_model_dir_
(
localized_model_dir
),
backend_
(
backend
),
state_
(
nullptr
)
{
}
TritonModel
::~
TritonModel
()
{
// Explicitly delete/finalize all model instances before finalizing
// the model itself.
instances_
.
clear
();
passive_instances_
.
clear
();
// Unregister itself from the rate limiter. Note this should happen
// after all instances are destructed. Destrucing instances ensures
// there are no instance threads waiting on rate limiter for
// receiving their payloads.
server_
->
GetRateLimiter
()
->
UnregisterModel
(
this
);
// Model finalization is optional... The TRITONBACKEND_Model
// object is this TritonModel object.
if
(
backend_
->
ModelFiniFn
()
!=
nullptr
)
{
LOG_TRITONSERVER_ERROR
(
backend_
->
ModelFiniFn
()(
reinterpret_cast
<
TRITONBACKEND_Model
*>
(
this
)),
"failed finalizing model"
);
}
}
extern
"C"
{
//
// TRITONBACKEND_Model
//
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelName
(
TRITONBACKEND_Model
*
model
,
const
char
**
name
)
{
TritonModel
*
tm
=
reinterpret_cast
<
TritonModel
*>
(
model
);
*
name
=
tm
->
Name
().
c_str
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelVersion
(
TRITONBACKEND_Model
*
model
,
uint64_t
*
version
)
{
TritonModel
*
tm
=
reinterpret_cast
<
TritonModel
*>
(
model
);
*
version
=
tm
->
Version
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelRepository
(
TRITONBACKEND_Model
*
model
,
TRITONBACKEND_ArtifactType
*
artifact_type
,
const
char
**
location
)
{
TritonModel
*
tm
=
reinterpret_cast
<
TritonModel
*>
(
model
);
*
artifact_type
=
TRITONBACKEND_ARTIFACT_FILESYSTEM
;
*
location
=
tm
->
LocalizedModelPath
().
c_str
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelConfig
(
TRITONBACKEND_Model
*
model
,
const
uint32_t
config_version
,
TRITONSERVER_Message
**
model_config
)
{
TritonModel
*
tm
=
reinterpret_cast
<
TritonModel
*>
(
model
);
std
::
string
model_config_json
;
Status
status
=
ModelConfigToJson
(
tm
->
Config
(),
config_version
,
&
model_config_json
);
if
(
!
status
.
IsOk
())
{
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
*
model_config
=
reinterpret_cast
<
TRITONSERVER_Message
*>
(
new
TritonServerMessage
(
std
::
move
(
model_config_json
)));
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelAutoCompleteConfig
(
TRITONBACKEND_Model
*
model
,
bool
*
auto_complete_config
)
{
TritonModel
*
tm
=
reinterpret_cast
<
TritonModel
*>
(
model
);
*
auto_complete_config
=
tm
->
AutoCompleteConfig
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelSetConfig
(
TRITONBACKEND_Model
*
model
,
const
uint32_t
config_version
,
TRITONSERVER_Message
*
model_config
)
{
TritonModel
*
tm
=
reinterpret_cast
<
TritonModel
*>
(
model
);
Status
status
=
tm
->
UpdateModelConfig
(
config_version
,
model_config
);
if
(
!
status
.
IsOk
())
{
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelServer
(
TRITONBACKEND_Model
*
model
,
TRITONSERVER_Server
**
server
)
{
TritonModel
*
tm
=
reinterpret_cast
<
TritonModel
*>
(
model
);
*
server
=
reinterpret_cast
<
TRITONSERVER_Server
*>
(
tm
->
Server
());
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelBackend
(
TRITONBACKEND_Model
*
model
,
TRITONBACKEND_Backend
**
backend
)
{
TritonModel
*
tm
=
reinterpret_cast
<
TritonModel
*>
(
model
);
*
backend
=
reinterpret_cast
<
TRITONBACKEND_Backend
*>
(
tm
->
Backend
().
get
());
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelState
(
TRITONBACKEND_Model
*
model
,
void
**
state
)
{
TritonModel
*
tm
=
reinterpret_cast
<
TritonModel
*>
(
model
);
*
state
=
tm
->
State
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelSetState
(
TRITONBACKEND_Model
*
model
,
void
*
state
)
{
TritonModel
*
tm
=
reinterpret_cast
<
TritonModel
*>
(
model
);
tm
->
SetState
(
state
);
return
nullptr
;
// success
}
///
/// TRITONBACKEND_Request
///
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestId
(
TRITONBACKEND_Request
*
request
,
const
char
**
id
)
{
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
*
id
=
tr
->
Id
().
c_str
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestCorrelationId
(
TRITONBACKEND_Request
*
request
,
uint64_t
*
id
)
{
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
const
InferenceRequest
::
SequenceId
&
correlation_id
=
tr
->
CorrelationId
();
if
(
correlation_id
.
Type
()
!=
InferenceRequest
::
SequenceId
::
DataType
::
UINT64
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
tr
->
LogRequest
()
+
"correlation ID in request is not an unsigned int"
)
.
c_str
());
}
*
id
=
correlation_id
.
UnsignedIntValue
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestFlags
(
TRITONBACKEND_Request
*
request
,
uint32_t
*
flags
)
{
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
*
flags
=
tr
->
Flags
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestCorrelationIdString
(
TRITONBACKEND_Request
*
request
,
const
char
**
id
)
{
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
const
InferenceRequest
::
SequenceId
&
correlation_id
=
tr
->
CorrelationId
();
if
(
correlation_id
.
Type
()
!=
InferenceRequest
::
SequenceId
::
DataType
::
STRING
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
tr
->
LogRequest
()
+
"correlation ID in request is not a string"
)
.
c_str
());
}
*
id
=
correlation_id
.
StringValue
().
c_str
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestInputCount
(
TRITONBACKEND_Request
*
request
,
uint32_t
*
count
)
{
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
*
count
=
tr
->
ImmutableInputs
().
size
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestInputName
(
TRITONBACKEND_Request
*
request
,
const
uint32_t
index
,
const
char
**
input_name
)
{
*
input_name
=
nullptr
;
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
const
auto
&
inputs
=
tr
->
ImmutableInputs
();
if
(
index
>=
inputs
.
size
())
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
tr
->
LogRequest
()
+
"out of bounds index "
+
std
::
to_string
(
index
)
+
": request has "
+
std
::
to_string
(
inputs
.
size
())
+
" inputs"
)
.
c_str
());
}
// The request inputs are not allowed to change once the request
// makes it to the backend, so it is ok to just iterate through the
// map. This linear search is the best we can do given the need for
// the inputs to be in a map and given the typical small number of
// inputs is better than having every request maintain the inputs as
// both map and vector.
uint32_t
cnt
=
0
;
for
(
const
auto
&
pr
:
inputs
)
{
if
(
cnt
++
==
index
)
{
InferenceRequest
::
Input
*
in
=
pr
.
second
;
*
input_name
=
in
->
Name
().
c_str
();
break
;
}
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestInput
(
TRITONBACKEND_Request
*
request
,
const
char
*
name
,
TRITONBACKEND_Input
**
input
)
{
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
const
auto
&
inputs
=
tr
->
ImmutableInputs
();
const
auto
&
itr
=
inputs
.
find
(
name
);
if
(
itr
==
inputs
.
end
())
{
*
input
=
nullptr
;
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
tr
->
LogRequest
()
+
"unknown request input name "
+
name
).
c_str
());
}
InferenceRequest
::
Input
*
in
=
itr
->
second
;
*
input
=
reinterpret_cast
<
TRITONBACKEND_Input
*>
(
in
);
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestInputByIndex
(
TRITONBACKEND_Request
*
request
,
const
uint32_t
index
,
TRITONBACKEND_Input
**
input
)
{
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
const
auto
&
inputs
=
tr
->
ImmutableInputs
();
if
(
index
>=
inputs
.
size
())
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
tr
->
LogRequest
()
+
"out of bounds index "
+
std
::
to_string
(
index
)
+
": request has "
+
std
::
to_string
(
inputs
.
size
())
+
" inputs"
)
.
c_str
());
}
// The request inputs are not allowed to change once the request
// makes it to the backend, so it is ok to just iterate through the
// map. This linear search is the best we can do given the need for
// the inputs to be in a map and given the typical small number of
// inputs is better than having every request maintain the inputs as
// both map and vector.
uint32_t
cnt
=
0
;
for
(
const
auto
&
pr
:
inputs
)
{
if
(
cnt
++
==
index
)
{
InferenceRequest
::
Input
*
in
=
pr
.
second
;
*
input
=
reinterpret_cast
<
TRITONBACKEND_Input
*>
(
in
);
break
;
}
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestOutputCount
(
TRITONBACKEND_Request
*
request
,
uint32_t
*
count
)
{
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
*
count
=
tr
->
ImmutableRequestedOutputs
().
size
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestOutputName
(
TRITONBACKEND_Request
*
request
,
const
uint32_t
index
,
const
char
**
output_name
)
{
*
output_name
=
nullptr
;
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
const
auto
&
routputs
=
tr
->
ImmutableRequestedOutputs
();
if
(
index
>=
routputs
.
size
())
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
tr
->
LogRequest
()
+
"out of bounds index "
+
std
::
to_string
(
index
)
+
": request has "
+
std
::
to_string
(
routputs
.
size
())
+
" requested outputs"
)
.
c_str
());
}
// The requested outputs are not allowed to change once the request
// makes it to the backend, so it is ok to just iterate through the
// set. This linear search is the best we can do given the requested
// outputs being in a set and given the typical small number of
// requested outputs it should not be a performance issue.
uint32_t
cnt
=
0
;
for
(
const
auto
&
rout
:
routputs
)
{
if
(
cnt
++
==
index
)
{
*
output_name
=
rout
.
c_str
();
break
;
}
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestOutputBufferProperties
(
TRITONBACKEND_Request
*
request
,
const
char
*
name
,
size_t
*
byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
{
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
auto
status
=
tr
->
OutputBufferProperties
(
name
,
byte_size
,
memory_type
,
memory_type_id
);
if
(
!
status
.
IsOk
())
{
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_RequestRelease
(
TRITONBACKEND_Request
*
request
,
uint32_t
release_flags
)
{
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
std
::
unique_ptr
<
InferenceRequest
>
ur
(
tr
);
InferenceRequest
::
Release
(
std
::
move
(
ur
),
release_flags
);
return
nullptr
;
// success
}
///
/// TRITONBACKEND_State
///
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_StateUpdate
(
TRITONBACKEND_State
*
state
)
{
SequenceState
*
ts
=
reinterpret_cast
<
SequenceState
*>
(
state
);
auto
status
=
ts
->
Update
();
if
(
!
status
.
IsOk
())
{
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_StateNew
(
TRITONBACKEND_State
**
state
,
TRITONBACKEND_Request
*
request
,
const
char
*
name
,
const
TRITONSERVER_DataType
datatype
,
const
int64_t
*
shape
,
const
uint32_t
dims_count
)
{
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
SequenceState
*
lstate
;
std
::
vector
<
int64_t
>
lshape
(
shape
,
shape
+
dims_count
);
auto
&
sequence_state
=
tr
->
GetSequenceStates
();
if
(
sequence_state
==
nullptr
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"unable to add state '"
)
+
name
+
"'. State configuration is missing for model '"
+
tr
->
ModelName
()
+
"'."
)
.
c_str
());
}
Status
status
=
sequence_state
->
OutputState
(
name
,
TritonToDataType
(
datatype
),
lshape
,
&
lstate
);
if
(
!
status
.
IsOk
())
{
*
state
=
nullptr
;
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
*
state
=
reinterpret_cast
<
TRITONBACKEND_State
*>
(
lstate
);
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_StateBuffer
(
TRITONBACKEND_State
*
state
,
void
**
buffer
,
const
uint64_t
buffer_byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
{
SequenceState
*
to
=
reinterpret_cast
<
SequenceState
*>
(
state
);
Status
status
=
Status
::
Success
;
// If the buffer size exactly matches the buffer available, reuse the
// currently allocated buffer.
if
(
to
->
Data
()
->
TotalByteSize
()
==
buffer_byte_size
)
{
const
std
::
shared_ptr
<
AllocatedMemory
>&
memory
=
reinterpret_cast
<
const
std
::
shared_ptr
<
AllocatedMemory
>&>
(
to
->
Data
());
TRITONSERVER_MemoryType
current_memory_type
;
int64_t
current_memory_type_id
;
void
*
lbuffer
=
memory
->
MutableBuffer
(
&
current_memory_type
,
&
current_memory_type_id
);
// If the requested memory type doesn't match the current buffer, allocate a
// new buffer with the requested memory type and memory type id.
if
(
current_memory_type
==
*
memory_type
&&
current_memory_type_id
==
*
memory_type_id
)
{
*
buffer
=
lbuffer
;
}
else
{
std
::
shared_ptr
<
AllocatedMemory
>
memory
=
std
::
make_shared
<
AllocatedMemory
>
(
buffer_byte_size
,
*
memory_type
,
*
memory_type_id
);
*
buffer
=
memory
->
MutableBuffer
(
memory_type
,
memory_type_id
);
to
->
RemoveAllData
();
status
=
to
->
SetData
(
memory
);
}
}
else
{
std
::
shared_ptr
<
AllocatedMemory
>
memory
=
std
::
make_shared
<
AllocatedMemory
>
(
buffer_byte_size
,
*
memory_type
,
*
memory_type_id
);
*
buffer
=
memory
->
MutableBuffer
(
memory_type
,
memory_type_id
);
to
->
RemoveAllData
();
status
=
to
->
SetData
(
memory
);
}
if
(
!
status
.
IsOk
())
{
*
buffer
=
nullptr
;
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_StateBufferAttributes
(
TRITONBACKEND_State
*
state
,
TRITONSERVER_BufferAttributes
**
buffer_attributes
)
{
SequenceState
*
to
=
reinterpret_cast
<
SequenceState
*>
(
state
);
to
->
Data
()
->
BufferAt
(
0
,
reinterpret_cast
<
BufferAttributes
**>
(
buffer_attributes
));
return
nullptr
;
// success
}
//
// TRITONBACKEND_ResponseFactory
//
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseFactoryNew
(
TRITONBACKEND_ResponseFactory
**
factory
,
TRITONBACKEND_Request
*
request
)
{
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
std
::
shared_ptr
<
InferenceResponseFactory
>*
response_factory
=
new
std
::
shared_ptr
<
InferenceResponseFactory
>
(
tr
->
ResponseFactory
());
*
factory
=
reinterpret_cast
<
TRITONBACKEND_ResponseFactory
*>
(
response_factory
);
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseFactoryDelete
(
TRITONBACKEND_ResponseFactory
*
factory
)
{
std
::
shared_ptr
<
InferenceResponseFactory
>*
response_factory
=
reinterpret_cast
<
std
::
shared_ptr
<
InferenceResponseFactory
>*>
(
factory
);
delete
response_factory
;
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseFactorySendFlags
(
TRITONBACKEND_ResponseFactory
*
factory
,
const
uint32_t
send_flags
)
{
std
::
shared_ptr
<
InferenceResponseFactory
>*
response_factory
=
reinterpret_cast
<
std
::
shared_ptr
<
InferenceResponseFactory
>*>
(
factory
);
Status
status
=
(
*
response_factory
)
->
SendFlags
(
send_flags
);
if
(
!
status
.
IsOk
())
{
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
return
nullptr
;
// success
}
///
/// TRITONBACKEND_Response
///
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseNew
(
TRITONBACKEND_Response
**
response
,
TRITONBACKEND_Request
*
request
)
{
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
std
::
unique_ptr
<
InferenceResponse
>
tresp
;
Status
status
=
tr
->
ResponseFactory
()
->
CreateResponse
(
&
tresp
);
if
(
!
status
.
IsOk
())
{
*
response
=
nullptr
;
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
*
response
=
reinterpret_cast
<
TRITONBACKEND_Response
*>
(
tresp
.
release
());
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseNewFromFactory
(
TRITONBACKEND_Response
**
response
,
TRITONBACKEND_ResponseFactory
*
factory
)
{
std
::
shared_ptr
<
InferenceResponseFactory
>*
response_factory
=
reinterpret_cast
<
std
::
shared_ptr
<
InferenceResponseFactory
>*>
(
factory
);
std
::
unique_ptr
<
InferenceResponse
>
tr
;
Status
status
=
(
*
response_factory
)
->
CreateResponse
(
&
tr
);
if
(
!
status
.
IsOk
())
{
*
response
=
nullptr
;
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
*
response
=
reinterpret_cast
<
TRITONBACKEND_Response
*>
(
tr
.
release
());
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseDelete
(
TRITONBACKEND_Response
*
response
)
{
InferenceResponse
*
tr
=
reinterpret_cast
<
InferenceResponse
*>
(
response
);
delete
tr
;
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseSetStringParameter
(
TRITONBACKEND_Response
*
response
,
const
char
*
name
,
const
char
*
value
)
{
InferenceResponse
*
tr
=
reinterpret_cast
<
InferenceResponse
*>
(
response
);
Status
status
=
tr
->
AddParameter
(
name
,
value
);
if
(
!
status
.
IsOk
())
{
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseSetIntParameter
(
TRITONBACKEND_Response
*
response
,
const
char
*
name
,
const
int64_t
value
)
{
InferenceResponse
*
tr
=
reinterpret_cast
<
InferenceResponse
*>
(
response
);
Status
status
=
tr
->
AddParameter
(
name
,
value
);
if
(
!
status
.
IsOk
())
{
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseSetBoolParameter
(
TRITONBACKEND_Response
*
response
,
const
char
*
name
,
const
bool
value
)
{
InferenceResponse
*
tr
=
reinterpret_cast
<
InferenceResponse
*>
(
response
);
Status
status
=
tr
->
AddParameter
(
name
,
value
);
if
(
!
status
.
IsOk
())
{
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseOutput
(
TRITONBACKEND_Response
*
response
,
TRITONBACKEND_Output
**
output
,
const
char
*
name
,
const
TRITONSERVER_DataType
datatype
,
const
int64_t
*
shape
,
const
uint32_t
dims_count
)
{
InferenceResponse
*
tr
=
reinterpret_cast
<
InferenceResponse
*>
(
response
);
std
::
vector
<
int64_t
>
lshape
(
shape
,
shape
+
dims_count
);
InferenceResponse
::
Output
*
loutput
;
Status
status
=
tr
->
AddOutput
(
name
,
TritonToDataType
(
datatype
),
std
::
move
(
lshape
),
&
loutput
);
if
(
!
status
.
IsOk
())
{
*
output
=
nullptr
;
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
*
output
=
reinterpret_cast
<
TRITONBACKEND_Output
*>
(
loutput
);
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ResponseSend
(
TRITONBACKEND_Response
*
response
,
const
uint32_t
send_flags
,
TRITONSERVER_Error
*
error
)
{
InferenceResponse
*
tr
=
reinterpret_cast
<
InferenceResponse
*>
(
response
);
Status
status
;
std
::
unique_ptr
<
InferenceResponse
>
utr
(
tr
);
if
(
error
==
nullptr
)
{
status
=
InferenceResponse
::
Send
(
std
::
move
(
utr
),
send_flags
);
}
else
{
status
=
InferenceResponse
::
SendWithStatus
(
std
::
move
(
utr
),
send_flags
,
Status
(
TritonCodeToStatusCode
(
TRITONSERVER_ErrorCode
(
error
)),
TRITONSERVER_ErrorMessage
(
error
)));
}
if
(
!
status
.
IsOk
())
{
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
return
nullptr
;
// success
}
///
/// TRITONBACKEND_Input
///
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_InputProperties
(
TRITONBACKEND_Input
*
input
,
const
char
**
name
,
TRITONSERVER_DataType
*
datatype
,
const
int64_t
**
shape
,
uint32_t
*
dims_count
,
uint64_t
*
byte_size
,
uint32_t
*
buffer_count
)
{
InferenceRequest
::
Input
*
ti
=
reinterpret_cast
<
InferenceRequest
::
Input
*>
(
input
);
if
(
name
!=
nullptr
)
{
*
name
=
ti
->
Name
().
c_str
();
}
if
(
datatype
!=
nullptr
)
{
*
datatype
=
DataTypeToTriton
(
ti
->
DType
());
}
if
(
shape
!=
nullptr
)
{
*
shape
=
ti
->
ShapeWithBatchDim
().
data
();
}
if
(
dims_count
!=
nullptr
)
{
*
dims_count
=
ti
->
ShapeWithBatchDim
().
size
();
}
if
(
byte_size
!=
nullptr
)
{
*
byte_size
=
ti
->
Data
()
->
TotalByteSize
();
}
if
(
buffer_count
!=
nullptr
)
{
*
buffer_count
=
ti
->
DataBufferCount
();
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_InputPropertiesForHostPolicy
(
TRITONBACKEND_Input
*
input
,
const
char
*
host_policy_name
,
const
char
**
name
,
TRITONSERVER_DataType
*
datatype
,
const
int64_t
**
shape
,
uint32_t
*
dims_count
,
uint64_t
*
byte_size
,
uint32_t
*
buffer_count
)
{
InferenceRequest
::
Input
*
ti
=
reinterpret_cast
<
InferenceRequest
::
Input
*>
(
input
);
if
(
name
!=
nullptr
)
{
*
name
=
ti
->
Name
().
c_str
();
}
if
(
datatype
!=
nullptr
)
{
*
datatype
=
DataTypeToTriton
(
ti
->
DType
());
}
if
(
shape
!=
nullptr
)
{
*
shape
=
ti
->
ShapeWithBatchDim
().
data
();
}
if
(
dims_count
!=
nullptr
)
{
*
dims_count
=
ti
->
ShapeWithBatchDim
().
size
();
}
if
(
host_policy_name
!=
nullptr
)
{
if
(
byte_size
!=
nullptr
)
{
*
byte_size
=
ti
->
Data
(
host_policy_name
)
->
TotalByteSize
();
}
if
(
buffer_count
!=
nullptr
)
{
*
buffer_count
=
ti
->
DataBufferCountForHostPolicy
(
host_policy_name
);
}
}
else
{
if
(
byte_size
!=
nullptr
)
{
*
byte_size
=
ti
->
Data
()
->
TotalByteSize
();
}
if
(
buffer_count
!=
nullptr
)
{
*
buffer_count
=
ti
->
DataBufferCount
();
}
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_InputBuffer
(
TRITONBACKEND_Input
*
input
,
const
uint32_t
index
,
const
void
**
buffer
,
uint64_t
*
buffer_byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
{
InferenceRequest
::
Input
*
ti
=
reinterpret_cast
<
InferenceRequest
::
Input
*>
(
input
);
Status
status
=
ti
->
DataBuffer
(
index
,
buffer
,
buffer_byte_size
,
memory_type
,
memory_type_id
);
if
(
!
status
.
IsOk
())
{
*
buffer
=
nullptr
;
*
buffer_byte_size
=
0
;
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_InputBufferAttributes
(
TRITONBACKEND_Input
*
input
,
const
uint32_t
index
,
const
void
**
buffer
,
TRITONSERVER_BufferAttributes
**
buffer_attributes
)
{
InferenceRequest
::
Input
*
ti
=
reinterpret_cast
<
InferenceRequest
::
Input
*>
(
input
);
Status
status
=
ti
->
DataBufferAttributes
(
index
,
buffer
,
reinterpret_cast
<
BufferAttributes
**>
(
buffer_attributes
));
if
(
!
status
.
IsOk
())
{
*
buffer
=
nullptr
;
*
buffer_attributes
=
nullptr
;
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_InputBufferForHostPolicy
(
TRITONBACKEND_Input
*
input
,
const
char
*
host_policy_name
,
const
uint32_t
index
,
const
void
**
buffer
,
uint64_t
*
buffer_byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
{
InferenceRequest
::
Input
*
ti
=
reinterpret_cast
<
InferenceRequest
::
Input
*>
(
input
);
Status
status
=
(
host_policy_name
==
nullptr
)
?
ti
->
DataBuffer
(
index
,
buffer
,
buffer_byte_size
,
memory_type
,
memory_type_id
)
:
ti
->
DataBufferForHostPolicy
(
index
,
buffer
,
buffer_byte_size
,
memory_type
,
memory_type_id
,
host_policy_name
);
if
(
!
status
.
IsOk
())
{
*
buffer
=
nullptr
;
*
buffer_byte_size
=
0
;
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
return
nullptr
;
// success
}
///
/// TRITONBACKEND_Output
///
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_OutputBuffer
(
TRITONBACKEND_Output
*
output
,
void
**
buffer
,
const
uint64_t
buffer_byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
{
InferenceResponse
::
Output
*
to
=
reinterpret_cast
<
InferenceResponse
::
Output
*>
(
output
);
Status
status
=
to
->
AllocateDataBuffer
(
buffer
,
buffer_byte_size
,
memory_type
,
memory_type_id
);
if
(
!
status
.
IsOk
())
{
*
buffer
=
nullptr
;
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
status
.
StatusCode
()),
status
.
Message
().
c_str
());
}
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_OutputBufferAttributes
(
TRITONBACKEND_Output
*
output
,
TRITONSERVER_BufferAttributes
**
buffer_attributes
)
{
InferenceResponse
::
Output
*
to
=
reinterpret_cast
<
InferenceResponse
::
Output
*>
(
output
);
*
buffer_attributes
=
reinterpret_cast
<
TRITONSERVER_BufferAttributes
*>
(
to
->
GetBufferAttributes
());
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_BackendAttributeAddPreferredInstanceGroup
(
TRITONBACKEND_BackendAttribute
*
backend_attributes
,
const
TRITONSERVER_InstanceGroupKind
kind
,
const
uint64_t
count
,
const
uint64_t
*
device_ids
,
const
uint64_t
id_count
)
{
auto
ba
=
reinterpret_cast
<
TritonBackend
::
Attribute
*>
(
backend_attributes
);
ba
->
preferred_groups_
.
emplace_back
();
auto
&
pg
=
ba
->
preferred_groups_
.
back
();
switch
(
kind
)
{
case
TRITONSERVER_INSTANCEGROUPKIND_AUTO
:
pg
.
set_kind
(
inference
::
ModelInstanceGroup
::
KIND_AUTO
);
break
;
case
TRITONSERVER_INSTANCEGROUPKIND_CPU
:
pg
.
set_kind
(
inference
::
ModelInstanceGroup
::
KIND_CPU
);
break
;
case
TRITONSERVER_INSTANCEGROUPKIND_GPU
:
pg
.
set_kind
(
inference
::
ModelInstanceGroup
::
KIND_GPU
);
break
;
case
TRITONSERVER_INSTANCEGROUPKIND_MODEL
:
pg
.
set_kind
(
inference
::
ModelInstanceGroup
::
KIND_MODEL
);
break
;
}
pg
.
set_count
(
count
);
if
(
device_ids
!=
nullptr
)
{
for
(
size_t
i
=
0
;
i
<
id_count
;
++
i
)
{
pg
.
add_gpus
(
device_ids
[
i
]);
}
}
return
nullptr
;
}
}
// extern C
}}
// namespace triton::core
3rdparty/core-r22.12/src/backend_model.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <memory>
#include <string>
#include "backend_manager.h"
#include "filesystem.h"
#include "infer_request.h"
#include "model.h"
#include "model_config.pb.h"
#include "status.h"
namespace
triton
{
namespace
core
{
class
InferenceServer
;
class
TritonModelInstance
;
//
// Represents a model.
//
// Inheriting from Model to implement backend APIs
//
class
TritonModel
:
public
Model
{
public:
static
Status
Create
(
InferenceServer
*
server
,
const
std
::
string
&
model_path
,
const
triton
::
common
::
BackendCmdlineConfigMap
&
backend_cmdline_config_map
,
const
triton
::
common
::
HostPolicyCmdlineConfigMap
&
host_policy_map
,
const
std
::
string
&
model_name
,
const
int64_t
version
,
inference
::
ModelConfig
model_config
,
const
bool
is_config_provided
,
std
::
unique_ptr
<
TritonModel
>*
model
);
~
TritonModel
();
const
std
::
string
&
LocalizedModelPath
()
const
{
return
localized_model_dir_
->
Path
();
}
InferenceServer
*
Server
()
{
return
server_
;
}
bool
AutoCompleteConfig
()
const
{
return
auto_complete_config_
;
}
Status
UpdateModelConfig
(
const
uint32_t
config_version
,
TRITONSERVER_Message
*
updated_config_message
);
const
std
::
shared_ptr
<
TritonBackend
>&
Backend
()
const
{
return
backend_
;
}
const
std
::
vector
<
std
::
unique_ptr
<
TritonModelInstance
>>&
Instances
()
const
{
return
instances_
;
}
void
*
State
()
{
return
state_
;
}
void
SetState
(
void
*
state
)
{
state_
=
state
;
}
Status
AddInstance
(
std
::
unique_ptr
<
TritonModelInstance
>&&
instance
,
const
bool
passive
);
private:
DISALLOW_COPY_AND_ASSIGN
(
TritonModel
);
TritonModel
(
InferenceServer
*
server
,
const
std
::
shared_ptr
<
LocalizedPath
>&
localized_model_dir
,
const
std
::
shared_ptr
<
TritonBackend
>&
backend
,
const
double
min_compute_capability
,
const
int64_t
version
,
const
inference
::
ModelConfig
&
config
,
const
bool
auto_complete_config
);
// Set the scheduler based on the model configuration. The scheduler
// can only be set once for a backend.
Status
SetConfiguredScheduler
();
// Merges the global backend configs with the specific
// backend configs.
static
Status
ResolveBackendConfigs
(
const
triton
::
common
::
BackendCmdlineConfigMap
&
backend_cmdline_config_map
,
const
std
::
string
&
backend_name
,
triton
::
common
::
BackendCmdlineConfig
&
config
);
// Sets defaults for some backend configurations when none are specified on
// the command line.
static
Status
SetBackendConfigDefaults
(
triton
::
common
::
BackendCmdlineConfig
&
config
);
Status
Initialize
();
Status
WarmUp
();
// The server object that owns this model. The model holds this as a
// raw pointer because the lifetime of the server is guaranteed to
// be longer than the lifetime of a model owned by the server.
InferenceServer
*
server_
;
// The minimum supported compute capability on device.
const
double
min_compute_capability_
;
// Whether the backend should attempt to auto-complete the model config.
const
bool
auto_complete_config_
;
// The localized repo directory holding the model. If localization
// required creation of a temporary local copy then that copy will
// persist as along as this object is retained by this model.
std
::
shared_ptr
<
LocalizedPath
>
localized_model_dir_
;
// Backend used by this model.
std
::
shared_ptr
<
TritonBackend
>
backend_
;
// The model instances for this model.
std
::
vector
<
std
::
unique_ptr
<
TritonModelInstance
>>
instances_
;
std
::
vector
<
std
::
unique_ptr
<
TritonModelInstance
>>
passive_instances_
;
// Opaque state associated with this model.
void
*
state_
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/backend_model_instance.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "backend_model_instance.h"
#ifndef _WIN32
#include <sys/resource.h>
#include <sys/syscall.h>
#include <unistd.h>
#endif
#include "backend_config.h"
#include "backend_model.h"
#include "cuda_utils.h"
#include "metrics.h"
#include "model_config.pb.h"
#include "numa_utils.h"
#include "server.h"
#include "shared_library.h"
#include "triton/common/logging.h"
#include "triton/common/nvtx.h"
#include "tritonserver_apis.h"
// For unknown reason, windows will not export the TRITONBACKEND_*
// functions declared with dllexport in tritonbackend.h. To get those
// functions exported it is (also?) necessary to mark the definitions
// in this file with dllexport as well.
#if defined(_MSC_VER)
#define TRITONAPI_DECLSPEC __declspec(dllexport)
#elif defined(__GNUC__)
#define TRITONAPI_DECLSPEC __attribute__((__visibility__("default")))
#else
#define TRITONAPI_DECLSPEC
#endif
namespace
triton
{
namespace
core
{
namespace
{
// Utilities for warmup feature
TRITONSERVER_Error
*
WarmupResponseAlloc
(
TRITONSERVER_ResponseAllocator
*
allocator
,
const
char
*
tensor_name
,
size_t
byte_size
,
TRITONSERVER_MemoryType
preferred_memory_type
,
int64_t
preferred_memory_type_id
,
void
*
userp
,
void
**
buffer
,
void
**
buffer_userp
,
TRITONSERVER_MemoryType
*
actual_memory_type
,
int64_t
*
actual_memory_type_id
)
{
*
buffer
=
malloc
(
byte_size
);
if
(
*
buffer
!=
nullptr
)
{
*
actual_memory_type
=
TRITONSERVER_MEMORY_CPU
;
*
actual_memory_type_id
=
0
;
return
nullptr
;
}
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"failed to allocate output buffer for warmup."
);
}
TRITONSERVER_Error
*
WarmupResponseRelease
(
TRITONSERVER_ResponseAllocator
*
allocator
,
void
*
buffer
,
void
*
buffer_userp
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
{
free
(
buffer
);
return
nullptr
;
}
ResponseAllocator
warmup_allocator
=
ResponseAllocator
(
WarmupResponseAlloc
,
WarmupResponseRelease
,
nullptr
/* start_fn */
);
void
WarmupResponseComplete
(
TRITONSERVER_InferenceResponse
*
iresponse
,
const
uint32_t
flags
,
void
*
userp
)
{
auto
res_pair
=
reinterpret_cast
<
std
::
pair
<
std
::
promise
<
void
>
,
std
::
vector
<
std
::
string
>*>*>
(
userp
);
if
(
iresponse
!=
nullptr
)
{
auto
err
=
TRITONSERVER_InferenceResponseError
(
iresponse
);
if
(
err
!=
nullptr
)
{
// The error vector is shared by all requests in the batch for now
static
std
::
mutex
res_mtx
;
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
res_mtx
);
res_pair
->
second
->
emplace_back
(
TRITONSERVER_ErrorMessage
(
err
));
}
TRITONSERVER_ErrorDelete
(
err
);
}
// Just delete the response, warmup doesn't check for correctness
LOG_TRITONSERVER_ERROR
(
TRITONSERVER_InferenceResponseDelete
(
iresponse
),
"deleting warmup response"
);
}
// Last response
if
((
flags
&
TRITONSERVER_RESPONSE_COMPLETE_FINAL
)
!=
0
)
{
res_pair
->
first
.
set_value
();
}
}
void
WarmupRequestComplete
(
TRITONSERVER_InferenceRequest
*
request
,
const
uint32_t
flags
,
void
*
userp
)
{
if
((
flags
&
TRITONSERVER_REQUEST_RELEASE_ALL
)
!=
0
)
{
// Don't need to release request here, it is managed in WarmupData
if
(
userp
!=
nullptr
)
{
auto
warmup_promise
=
reinterpret_cast
<
std
::
promise
<
void
>*>
(
userp
);
warmup_promise
->
set_value
();
}
}
}
}
// namespace
TritonModelInstance
::
TritonModelInstance
(
TritonModel
*
model
,
const
std
::
string
&
name
,
const
size_t
index
,
const
TRITONSERVER_InstanceGroupKind
kind
,
const
int32_t
device_id
,
const
std
::
vector
<
std
::
string
>&
profile_names
,
const
bool
passive
,
const
triton
::
common
::
HostPolicyCmdlineConfig
&
host_policy
,
const
TritonServerMessage
&
host_policy_message
,
const
std
::
vector
<
SecondaryDevice
>&
secondary_devices
)
:
model_
(
model
),
name_
(
name
),
index_
(
index
),
kind_
(
kind
),
device_id_
(
device_id
),
host_policy_
(
host_policy
),
host_policy_message_
(
host_policy_message
),
profile_names_
(
profile_names
),
passive_
(
passive
),
secondary_devices_
(
secondary_devices
),
state_
(
nullptr
)
{
#ifdef TRITON_ENABLE_METRICS
if
(
Metrics
::
Enabled
())
{
// Use an ID in the metric only for GPU instances. Otherwise use
// METRIC_REPORTER_ID_CPU to indicate no device should be reported in the
// metric.
const
int
id
=
(
kind_
==
TRITONSERVER_INSTANCEGROUPKIND_GPU
)
?
device_id_
:
METRIC_REPORTER_ID_CPU
;
MetricModelReporter
::
Create
(
model_
->
Name
(),
model_
->
Version
(),
id
,
model_
->
Config
().
metric_tags
(),
&
reporter_
);
}
#endif // TRITON_ENABLE_METRICS
}
TritonModelInstance
::~
TritonModelInstance
()
{
if
(
triton_backend_thread_
.
get
()
!=
nullptr
)
{
triton_backend_thread_
->
StopBackendThread
();
}
// Model finalization is optional...
if
(
model_
->
Backend
()
->
ModelInstanceFiniFn
()
!=
nullptr
)
{
LOG_TRITONSERVER_ERROR
(
model_
->
Backend
()
->
ModelInstanceFiniFn
()(
reinterpret_cast
<
TRITONBACKEND_ModelInstance
*>
(
this
)),
"failed finalizing model instance"
);
}
}
Status
TritonModelInstance
::
CreateInstances
(
TritonModel
*
model
,
const
triton
::
common
::
BackendCmdlineConfigMap
&
backend_cmdline_config_map
,
const
triton
::
common
::
HostPolicyCmdlineConfigMap
&
host_policy_map
,
const
inference
::
ModelConfig
&
model_config
,
const
bool
device_blocking
)
{
static
triton
::
common
::
HostPolicyCmdlineConfig
empty_host_policy
;
// This structure is used to allocate TritonBackendThread to instances on same
// device for device blocking execution policy.
std
::
map
<
uint32_t
,
std
::
shared_ptr
<
TritonBackendThread
>>
device_to_thread_map
;
for
(
const
auto
&
group
:
model_config
.
instance_group
())
{
std
::
vector
<
std
::
string
>
profile_names
;
for
(
const
auto
&
profile_name
:
group
.
profile
())
{
profile_names
.
push_back
(
profile_name
);
}
std
::
vector
<
SecondaryDevice
>
secondary_devices
;
for
(
const
auto
&
secondary_device
:
group
.
secondary_devices
())
{
secondary_devices
.
emplace_back
(
inference
::
ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind_Name
(
secondary_device
.
kind
()),
secondary_device
.
device_id
());
}
for
(
int32_t
c
=
0
;
c
<
group
.
count
();
++
c
)
{
std
::
string
instance_name
{
group
.
count
()
>
1
?
group
.
name
()
+
"_"
+
std
::
to_string
(
c
)
:
group
.
name
()};
const
bool
passive
=
group
.
passive
();
std
::
vector
<
std
::
tuple
<
std
::
string
,
TRITONSERVER_InstanceGroupKind
,
int32_t
,
const
inference
::
ModelRateLimiter
*>>
instance_setting
;
if
(
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_CPU
)
{
instance_setting
.
emplace_back
(
group
.
host_policy
().
empty
()
?
"cpu"
:
group
.
host_policy
(),
TRITONSERVER_INSTANCEGROUPKIND_CPU
,
0
/* device_id */
,
&
group
.
rate_limiter
());
}
else
if
(
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_GPU
)
{
for
(
const
int32_t
device_id
:
group
.
gpus
())
{
instance_setting
.
emplace_back
(
group
.
host_policy
().
empty
()
?
(
"gpu_"
+
std
::
to_string
(
device_id
))
:
group
.
host_policy
(),
TRITONSERVER_INSTANCEGROUPKIND_GPU
,
device_id
,
&
group
.
rate_limiter
());
}
}
else
if
(
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_MODEL
)
{
instance_setting
.
emplace_back
(
group
.
host_policy
().
empty
()
?
"model"
:
group
.
host_policy
(),
TRITONSERVER_INSTANCEGROUPKIND_MODEL
,
0
/* device_id */
,
&
group
.
rate_limiter
());
}
else
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
std
::
string
(
"instance_group kind "
)
+
ModelInstanceGroup_Kind_Name
(
group
.
kind
())
+
" not supported"
);
}
for
(
const
auto
is
:
instance_setting
)
{
const
auto
&
kind
=
std
::
get
<
1
>
(
is
);
const
auto
&
id
=
std
::
get
<
2
>
(
is
);
const
std
::
string
&
policy_name
=
std
::
get
<
0
>
(
is
);
const
triton
::
common
::
HostPolicyCmdlineConfig
*
host_policy
;
const
auto
policy_it
=
host_policy_map
.
find
(
policy_name
);
if
(
policy_it
!=
host_policy_map
.
end
())
{
host_policy
=
&
policy_it
->
second
;
}
else
{
host_policy
=
&
empty_host_policy
;
}
RETURN_IF_ERROR
(
SetNumaConfigOnThread
(
*
host_policy
));
auto
err
=
CreateInstance
(
model
,
instance_name
,
c
,
kind
,
id
,
profile_names
,
passive
,
policy_name
,
*
host_policy
,
*
(
std
::
get
<
3
>
(
is
)),
device_blocking
,
&
device_to_thread_map
,
secondary_devices
);
RETURN_IF_ERROR
(
ResetNumaMemoryPolicy
());
RETURN_IF_ERROR
(
err
);
// When deploying on GPU, we want to make sure the GPU memory usage
// is within allowed range, otherwise, stop the creation to ensure
// there is sufficient GPU memory for other use.
// We check the usage after loading the instance to better enforcing
// the limit. If we check before loading, we may create instance
// that occupies the rest of available memory which against the purpose
if
(
kind
==
TRITONSERVER_INSTANCEGROUPKIND_GPU
)
{
size_t
free
,
total
;
double
memory_limit
;
RETURN_IF_ERROR
(
GetDeviceMemoryInfo
(
id
,
&
free
,
&
total
));
RETURN_IF_ERROR
(
BackendConfigurationModelLoadGpuFraction
(
backend_cmdline_config_map
,
id
,
&
memory_limit
));
const
size_t
allow
=
total
*
memory_limit
;
const
size_t
used
=
total
-
free
;
if
(
used
>
allow
)
{
return
Status
(
Status
::
Code
::
UNAVAILABLE
,
std
::
string
(
"can not create model '"
)
+
instance_name
+
"': memory limit set for "
+
TRITONSERVER_InstanceGroupKindString
(
kind
)
+
" "
+
std
::
to_string
(
id
)
+
" has exceeded, model loading is rejected."
);
}
}
}
}
}
return
Status
::
Success
;
}
Status
TritonModelInstance
::
CreateInstance
(
TritonModel
*
model
,
const
std
::
string
&
name
,
const
size_t
index
,
const
TRITONSERVER_InstanceGroupKind
kind
,
const
int32_t
device_id
,
const
std
::
vector
<
std
::
string
>&
profile_names
,
const
bool
passive
,
const
std
::
string
&
host_policy_name
,
const
triton
::
common
::
HostPolicyCmdlineConfig
&
host_policy
,
const
inference
::
ModelRateLimiter
&
rate_limiter_config
,
const
bool
device_blocking
,
std
::
map
<
uint32_t
,
std
::
shared_ptr
<
TritonBackendThread
>>*
device_to_thread_map
,
const
std
::
vector
<
SecondaryDevice
>&
secondary_devices
)
{
// Create the JSON representation of the backend configuration.
triton
::
common
::
TritonJson
::
Value
host_policy_json
(
triton
::
common
::
TritonJson
::
ValueType
::
OBJECT
);
triton
::
common
::
TritonJson
::
Value
policy_setting_json
(
host_policy_json
,
triton
::
common
::
TritonJson
::
ValueType
::
OBJECT
);
for
(
const
auto
&
pr
:
host_policy
)
{
RETURN_IF_ERROR
(
policy_setting_json
.
AddString
(
pr
.
first
.
c_str
(),
pr
.
second
));
}
RETURN_IF_ERROR
(
host_policy_json
.
Add
(
host_policy_name
.
c_str
(),
std
::
move
(
policy_setting_json
)));
TritonServerMessage
host_policy_message
(
host_policy_json
);
std
::
unique_ptr
<
TritonModelInstance
>
local_instance
(
new
TritonModelInstance
(
model
,
name
,
index
,
kind
,
device_id
,
profile_names
,
passive
,
host_policy
,
host_policy_message
,
secondary_devices
));
TRITONBACKEND_ModelInstance
*
triton_instance
=
reinterpret_cast
<
TRITONBACKEND_ModelInstance
*>
(
local_instance
.
get
());
// Instance initialization is optional... We must set set shared
// library path to point to the backend directory in case the
// backend library attempts to load additional shared libaries.
if
(
model
->
Backend
()
->
ModelInstanceInitFn
()
!=
nullptr
)
{
std
::
unique_ptr
<
SharedLibrary
>
slib
;
RETURN_IF_ERROR
(
SharedLibrary
::
Acquire
(
&
slib
));
RETURN_IF_ERROR
(
slib
->
SetLibraryDirectory
(
model
->
Backend
()
->
Directory
()));
TRITONSERVER_Error
*
err
=
model
->
Backend
()
->
ModelInstanceInitFn
()(
triton_instance
);
RETURN_IF_ERROR
(
slib
->
ResetLibraryDirectory
());
RETURN_IF_TRITONSERVER_ERROR
(
err
);
}
if
(
!
passive
)
{
RETURN_IF_ERROR
(
local_instance
->
GenerateWarmupData
());
RETURN_IF_ERROR
(
model
->
Server
()
->
GetRateLimiter
()
->
RegisterModelInstance
(
local_instance
.
get
(),
rate_limiter_config
));
RETURN_IF_ERROR
(
local_instance
->
SetBackendThread
(
kind
,
device_id
,
device_blocking
,
device_to_thread_map
));
}
RETURN_IF_ERROR
(
model
->
AddInstance
(
std
::
move
(
local_instance
),
passive
));
return
Status
::
Success
;
}
Status
TritonModelInstance
::
SetBackendThread
(
const
TRITONSERVER_InstanceGroupKind
kind
,
const
int32_t
device_id
,
const
bool
device_blocking
,
std
::
map
<
uint32_t
,
std
::
shared_ptr
<
TritonBackendThread
>>*
device_to_thread_map
)
{
if
(
device_blocking
&&
(
kind
==
TRITONSERVER_INSTANCEGROUPKIND_GPU
))
{
auto
thread_it
=
device_to_thread_map
->
find
(
device_id
);
if
(
thread_it
!=
device_to_thread_map
->
end
())
{
LOG_VERBOSE
(
1
)
<<
"Using already started backend thread for "
<<
Name
()
<<
" on device "
<<
device_id
;
triton_backend_thread_
=
thread_it
->
second
;
}
}
if
(
triton_backend_thread_
.
get
()
==
nullptr
)
{
std
::
unique_ptr
<
TritonBackendThread
>
local_backend_thread
;
RETURN_IF_ERROR
(
TritonBackendThread
::
CreateBackendThread
(
Name
(),
this
,
0
/* nice */
,
device_id
,
&
local_backend_thread
));
triton_backend_thread_
=
std
::
move
(
local_backend_thread
);
device_to_thread_map
->
insert
({
device_id
,
triton_backend_thread_
});
}
else
{
triton_backend_thread_
->
AddModelInstance
(
this
);
}
RETURN_IF_ERROR
(
triton_backend_thread_
->
InitAndWarmUpModelInstance
(
this
));
return
Status
::
Success
;
}
Status
TritonModelInstance
::
GenerateWarmupData
()
{
warmup_samples_
.
clear
();
for
(
const
auto
&
warmup_setting
:
model_
->
Config
().
model_warmup
())
{
if
(
warmup_setting
.
batch_size
()
==
0
)
{
LOG_VERBOSE
(
1
)
<<
"Skipping batch 0 warmup sample '"
<<
warmup_setting
.
name
()
<<
"'"
;
continue
;
}
LOG_VERBOSE
(
1
)
<<
"Generating warmup sample data for '"
<<
warmup_setting
.
name
()
<<
"'"
;
// Two passes. First pass to get max byte size for synthetic
// data. Second pass to add original inputs and override inputs
// for control inputs.
int64_t
max_zero_byte_size
=
0
;
int64_t
max_random_byte_size
=
0
;
for
(
const
auto
&
input_meta
:
warmup_setting
.
inputs
())
{
auto
element_count
=
triton
::
common
::
GetElementCount
(
input_meta
.
second
.
dims
());
if
(
element_count
==
-
1
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"warmup setting expects all variable-size dimensions are specified "
"for input '"
+
input_meta
.
first
+
"'"
);
}
int64_t
batch_byte_size
=
element_count
*
triton
::
common
::
GetDataTypeByteSize
(
input_meta
.
second
.
data_type
());
if
(
batch_byte_size
==
0
)
{
batch_byte_size
=
element_count
*
sizeof
(
int32_t
);
}
switch
(
input_meta
.
second
.
input_data_type_case
())
{
case
inference
::
ModelWarmup_Input
::
InputDataTypeCase
::
kZeroData
:
max_zero_byte_size
=
std
::
max
(
batch_byte_size
,
max_zero_byte_size
);
break
;
case
inference
::
ModelWarmup_Input
::
InputDataTypeCase
::
kRandomData
:
{
// Because Triton expects STRING type to be in special format
// (prepend 4 bytes to specify string length), so using zero data
// for simplicity (4 bytes * element count of zeros).
if
(
input_meta
.
second
.
data_type
()
==
inference
::
DataType
::
TYPE_STRING
)
{
max_zero_byte_size
=
std
::
max
(
batch_byte_size
,
max_zero_byte_size
);
}
else
{
max_random_byte_size
=
std
::
max
(
batch_byte_size
,
max_random_byte_size
);
}
break
;
}
default:
break
;
}
}
warmup_samples_
.
emplace_back
(
warmup_setting
.
name
(),
warmup_setting
.
count
());
auto
&
warmup_data
=
warmup_samples_
.
back
();
// Create buffers for synthetic data
TRITONSERVER_MemoryType
type
;
int64_t
type_id
;
warmup_data
.
zero_data_
.
reset
(
new
AllocatedMemory
(
max_zero_byte_size
,
TRITONSERVER_MEMORY_CPU_PINNED
/* memory_type */
,
0
/* memory_type_id */
));
char
*
zero_buffer
=
warmup_data
.
zero_data_
->
MutableBuffer
(
&
type
,
&
type_id
);
memset
(
zero_buffer
,
0
,
max_zero_byte_size
);
warmup_data
.
random_data_
.
reset
(
new
AllocatedMemory
(
max_random_byte_size
,
TRITONSERVER_MEMORY_CPU_PINNED
/* memory_type */
,
0
/* memory_type_id */
));
char
*
random_buffer
=
warmup_data
.
random_data_
->
MutableBuffer
(
&
type
,
&
type_id
);
for
(
int64_t
offset
=
0
;
offset
<
max_random_byte_size
;
offset
++
)
{
random_buffer
[
offset
]
=
rand
();
}
// Prepare the inference request for the specified sample, not using
// in-process C API because the request doesn't go through the same pipeline
// (i.e. no normalization / scheduler) so we need to prepare the request to
// the state just before calling instance execute function.
for
(
size_t
cnt
=
0
;
cnt
<
warmup_setting
.
batch_size
();
cnt
++
)
{
warmup_data
.
requests_
.
emplace_back
(
new
InferenceRequest
(
model_
,
model_
->
Version
()));
auto
&
lrequest
=
warmup_data
.
requests_
.
back
();
// Second pass to prepare original inputs.
std
::
vector
<
std
::
shared_ptr
<
InferenceRequest
::
Input
>>
input_sps
;
for
(
const
auto
&
input_meta
:
warmup_setting
.
inputs
())
{
auto
batch1_element_count
=
triton
::
common
::
GetElementCount
(
input_meta
.
second
.
dims
());
auto
batch_byte_size
=
batch1_element_count
*
triton
::
common
::
GetDataTypeByteSize
(
input_meta
.
second
.
data_type
());
if
(
batch_byte_size
==
0
)
{
batch_byte_size
=
batch1_element_count
*
sizeof
(
int32_t
);
}
const
char
*
allocated_ptr
;
switch
(
input_meta
.
second
.
input_data_type_case
())
{
case
inference
::
ModelWarmup_Input
::
InputDataTypeCase
::
kZeroData
:
allocated_ptr
=
zero_buffer
;
break
;
case
inference
::
ModelWarmup_Input
::
InputDataTypeCase
::
kRandomData
:
{
if
(
input_meta
.
second
.
data_type
()
==
inference
::
DataType
::
TYPE_STRING
)
{
allocated_ptr
=
zero_buffer
;
}
else
{
allocated_ptr
=
random_buffer
;
}
break
;
}
case
inference
::
ModelWarmup_Input
::
InputDataTypeCase
::
kInputDataFile:
{
// For data provided from file, we can set buffer in first pass
warmup_data
.
provided_data_
.
emplace_back
(
new
std
::
string
());
auto
input_data
=
warmup_data
.
provided_data_
.
back
().
get
();
RETURN_IF_ERROR
(
ReadTextFile
(
JoinPath
(
{
model_
->
LocalizedModelPath
(),
kWarmupDataFolder
,
input_meta
.
second
.
input_data_file
()
}
),
input_data
));
if
(
input_meta
.
second
.
data_type
()
==
inference
::
DataType
::
TYPE_STRING
)
{
batch_byte_size
=
input_data
->
size
();
}
else
if
(((
size_t
)
batch_byte_size
)
>
input_data
->
size
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
lrequest
->
LogRequest
()
+
"warmup setting expects "
+
std
::
to_string
(
batch_byte_size
)
+
" bytes, but the data "
"provided from "
+
input_meta
.
second
.
input_data_file
()
+
"only has "
+
std
::
to_string
(
input_data
->
size
())
+
" bytes"
);
}
allocated_ptr
=
input_data
->
data
();
break
;
}
default:
return
Status
(
Status
::
Code
::
INVALID_ARG
,
lrequest
->
LogRequest
()
+
"warmup setting expects input '"
+
input_meta
.
first
+
"' to have input_data_type set"
);
}
const
inference
::
ModelInput
*
input_config
;
bool
is_original_input
=
model_
->
GetInput
(
input_meta
.
first
,
&
input_config
).
IsOk
();
InferenceRequest
::
Input
*
input
=
nullptr
;
std
::
vector
<
int64_t
>
input_meta_shape
;
// Append batch size only if the model supports batching
// and not control inpt.
if
((
model_
->
Config
().
max_batch_size
()
!=
0
)
&&
is_original_input
)
{
input_meta_shape
.
push_back
(
1
);
}
for
(
auto
d
:
input_meta
.
second
.
dims
())
{
input_meta_shape
.
push_back
(
d
);
}
if
(
is_original_input
)
{
RETURN_IF_ERROR
(
lrequest
->
AddOriginalInput
(
input_meta
.
first
,
input_meta
.
second
.
data_type
(),
input_meta_shape
,
&
input
));
}
else
{
input_sps
.
emplace_back
();
RETURN_IF_ERROR
(
lrequest
->
AddOverrideInput
(
input_meta
.
first
,
input_meta
.
second
.
data_type
(),
(
model_
->
Config
().
max_batch_size
()
!=
0
?
1
:
0
),
input_meta_shape
,
&
input_sps
.
back
()));
input
=
input_sps
.
back
().
get
();
}
RETURN_IF_ERROR
(
input
->
AppendData
(
allocated_ptr
,
batch_byte_size
,
TRITONSERVER_MEMORY_CPU
/* memory_type */
,
0
/* memory_type_id */
));
}
RETURN_IF_ERROR
(
lrequest
->
PrepareForInference
());
// Override inputs must be added after PrepareForInference() is called
for
(
const
auto
&
sp
:
input_sps
)
{
RETURN_IF_ERROR
(
lrequest
->
AddOverrideInput
(
sp
));
}
}
}
return
Status
::
Success
;
}
void
TritonModelInstance
::
Schedule
(
std
::
vector
<
std
::
unique_ptr
<
InferenceRequest
>>&&
requests
,
const
std
::
function
<
void
()
>&
OnCompletion
)
{
// Use a thread local vector to avoid needing to malloc each
// time an inference is run.
thread_local
std
::
vector
<
TRITONBACKEND_Request
*>
triton_requests
(
1024
);
triton_requests
.
clear
();
for
(
auto
&
r
:
requests
)
{
// Load the input states for the inference request.
r
->
LoadInputStates
();
triton_requests
.
push_back
(
reinterpret_cast
<
TRITONBACKEND_Request
*>
(
r
.
release
()));
}
Execute
(
triton_requests
);
OnCompletion
();
}
Status
TritonModelInstance
::
Initialize
()
{
RETURN_IF_ERROR
(
SetNumaConfigOnThread
(
HostPolicy
()));
return
Status
::
Success
;
}
Status
TritonModelInstance
::
WarmUp
()
{
// move samples to local variable for scoped cleanup
std
::
vector
<
triton
::
core
::
TritonModelInstance
::
WarmupData
>
lwarmup_samples
;
lwarmup_samples
.
swap
(
warmup_samples_
);
for
(
auto
&
sample
:
lwarmup_samples
)
{
for
(
size_t
iteration
=
1
;
iteration
<=
sample
.
count_
;
++
iteration
)
{
LOG_VERBOSE
(
1
)
<<
"model '"
<<
sample
.
requests_
.
back
()
->
ModelName
()
<<
"' instance "
<<
Name
()
<<
" is running warmup sample '"
<<
sample
.
sample_name_
<<
"' for iteration "
<<
iteration
;
// request/response complete is asynchronous so use promise to wait for
// completion. Also collects error message from the responses in a vector.
std
::
vector
<
std
::
promise
<
void
>>
request_complete
(
sample
.
requests_
.
size
());
std
::
vector
<
std
::
string
>
response_errors
;
std
::
vector
<
std
::
pair
<
std
::
promise
<
void
>
,
std
::
vector
<
std
::
string
>*>>
response_complete
(
sample
.
requests_
.
size
());
std
::
vector
<
TRITONBACKEND_Request
*>
triton_requests
;
for
(
size_t
i
=
0
;
i
<
sample
.
requests_
.
size
();
++
i
)
{
auto
&
request
=
sample
.
requests_
[
i
];
request
->
SetReleaseCallback
(
WarmupRequestComplete
,
&
request_complete
[
i
]);
response_complete
[
i
].
second
=
&
response_errors
;
request
->
SetResponseCallback
(
&
warmup_allocator
,
nullptr
,
WarmupResponseComplete
,
&
response_complete
[
i
]);
// Capture timestamp before run to avoid incorrect accumulation from
// sequential warmup runs
#ifdef TRITON_ENABLE_STATS
request
->
CaptureRequestStartNs
();
#endif // TRITON_ENABLE_STATS
request
->
CaptureQueueStartNs
();
triton_requests
.
push_back
(
reinterpret_cast
<
TRITONBACKEND_Request
*>
(
request
.
get
()));
}
Execute
(
triton_requests
);
// Wait for warmup sample to complete and check error
for
(
size_t
i
=
0
;
i
<
sample
.
requests_
.
size
();
++
i
)
{
request_complete
[
i
].
get_future
().
get
();
response_complete
[
i
].
first
.
get_future
().
get
();
}
if
(
response_errors
.
size
()
!=
0
)
{
std
::
string
err_str
=
"failed to run warmup sample '"
+
sample
.
sample_name_
+
"': "
;
for
(
const
auto
&
error
:
response_errors
)
{
err_str
+=
(
error
+
"; "
);
}
// End warmup as soon as there is failing sample
LOG_VERBOSE
(
1
)
<<
"model '"
<<
sample
.
requests_
.
back
()
->
ModelName
()
<<
"' instance "
<<
Name
()
<<
" failed to run warmup sample '"
<<
sample
.
sample_name_
<<
"'"
;
return
Status
(
Status
::
Code
::
INVALID_ARG
,
err_str
);
}
}
}
return
Status
::
Success
;
}
void
TritonModelInstance
::
Execute
(
std
::
vector
<
TRITONBACKEND_Request
*>&
triton_requests
)
{
TRITONBACKEND_ModelInstance
*
triton_model_instance
=
reinterpret_cast
<
TRITONBACKEND_ModelInstance
*>
(
this
);
TritonBackend
::
TritonModelInstanceExecFn_t
inst_exec_fn
=
model_
->
Backend
()
->
ModelInstanceExecFn
();
// If there is an error then we retain ownership of 'requests'
// and must send error responses.
TRITONSERVER_Error
*
err
=
inst_exec_fn
(
triton_model_instance
,
&
triton_requests
[
0
],
triton_requests
.
size
());
if
(
err
!=
nullptr
)
{
Status
status
=
Status
(
TritonCodeToStatusCode
(
TRITONSERVER_ErrorCode
(
err
)),
TRITONSERVER_ErrorMessage
(
err
));
for
(
TRITONBACKEND_Request
*
tr
:
triton_requests
)
{
std
::
unique_ptr
<
InferenceRequest
>
ur
(
reinterpret_cast
<
InferenceRequest
*>
(
tr
));
InferenceRequest
::
RespondIfError
(
ur
,
status
,
true
/* release_requests */
);
}
TRITONSERVER_ErrorDelete
(
err
);
}
}
Status
TritonModelInstance
::
TritonBackendThread
::
CreateBackendThread
(
const
std
::
string
name
,
TritonModelInstance
*
model_instance
,
const
int
nice
,
const
int32_t
device_id
,
std
::
unique_ptr
<
TritonBackendThread
>*
triton_backend_thread
)
{
TritonBackendThread
*
raw_triton_backend_thread
=
new
TritonBackendThread
(
name
,
model_instance
->
Model
());
std
::
unique_ptr
<
TritonBackendThread
>
runner
(
raw_triton_backend_thread
);
runner
->
AddModelInstance
(
model_instance
);
runner
->
backend_thread_
=
std
::
thread
([
raw_triton_backend_thread
,
nice
,
device_id
]()
{
raw_triton_backend_thread
->
BackendThread
(
nice
,
device_id
);
});
triton_backend_thread
->
reset
(
runner
.
release
());
return
Status
::
Success
;
}
void
TritonModelInstance
::
TritonBackendThread
::
AddModelInstance
(
TritonModelInstance
*
model_instance
)
{
model_instances_
.
push_back
(
model_instance
);
}
Status
TritonModelInstance
::
TritonBackendThread
::
InitAndWarmUpModelInstance
(
TritonModelInstance
*
model_instance
)
{
// Initialize the instance on the backend thread
auto
init_payload
=
model_
->
Server
()
->
GetRateLimiter
()
->
GetPayload
(
Payload
::
Operation
::
INIT
,
model_instance
);
RETURN_IF_ERROR
(
model_
->
Server
()
->
GetRateLimiter
()
->
EnqueuePayload
(
model_
,
init_payload
));
RETURN_IF_ERROR
(
init_payload
->
Wait
());
// Warm-up the instance on the backend thread
auto
warmup_payload
=
model_
->
Server
()
->
GetRateLimiter
()
->
GetPayload
(
Payload
::
Operation
::
WARM_UP
,
model_instance
);
RETURN_IF_ERROR
(
model_
->
Server
()
->
GetRateLimiter
()
->
EnqueuePayload
(
model_
,
warmup_payload
));
RETURN_IF_ERROR
(
warmup_payload
->
Wait
());
return
Status
::
Success
;
}
TritonModelInstance
::
TritonBackendThread
::
TritonBackendThread
(
const
std
::
string
&
name
,
TritonModel
*
model
)
:
name_
(
name
),
model_
(
model
)
{
}
TritonModelInstance
::
TritonBackendThread
::~
TritonBackendThread
()
{
StopBackendThread
();
}
void
TritonModelInstance
::
TritonBackendThread
::
StopBackendThread
()
{
if
(
backend_thread_
.
joinable
())
{
// Signal the backend thread to exit and then wait for it...
auto
exit_payload
=
model_
->
Server
()
->
GetRateLimiter
()
->
GetPayload
(
Payload
::
Operation
::
EXIT
,
model_instances_
.
back
());
model_
->
Server
()
->
GetRateLimiter
()
->
EnqueuePayload
(
model_
,
exit_payload
);
backend_thread_
.
join
();
}
}
void
TritonModelInstance
::
TritonBackendThread
::
BackendThread
(
const
int
nice
,
const
int32_t
device_id
)
{
#ifndef _WIN32
if
(
setpriority
(
PRIO_PROCESS
,
syscall
(
SYS_gettid
),
nice
)
==
0
)
{
LOG_VERBOSE
(
1
)
<<
"Starting backend thread for "
<<
name_
<<
" at nice "
<<
nice
<<
" on device "
<<
device_id
<<
"..."
;
}
else
{
LOG_VERBOSE
(
1
)
<<
"Starting backend thread for "
<<
name_
<<
" at default nice (requested nice "
<<
nice
<<
" failed)"
<<
" on device "
<<
device_id
<<
"..."
;
}
#else
LOG_VERBOSE
(
1
)
<<
"Starting backend thread for "
<<
name_
<<
" at default nice on device "
<<
device_id
<<
"..."
;
#endif
bool
should_exit
=
false
;
while
(
!
should_exit
)
{
std
::
shared_ptr
<
Payload
>
payload
;
model_
->
Server
()
->
GetRateLimiter
()
->
DequeuePayload
(
model_instances_
,
&
payload
);
NVTX_RANGE
(
nvtx_
,
"BackendThread "
+
name_
);
payload
->
Execute
(
&
should_exit
);
model_instances_
.
push_back
(
payload
->
GetInstance
());
// Release the payload to the RateLimiter
model_
->
Server
()
->
GetRateLimiter
()
->
PayloadRelease
(
payload
);
}
LOG_VERBOSE
(
1
)
<<
"Stopping backend thread for "
<<
name_
<<
"..."
;
}
extern
"C"
{
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceName
(
TRITONBACKEND_ModelInstance
*
instance
,
const
char
**
name
)
{
TritonModelInstance
*
ti
=
reinterpret_cast
<
TritonModelInstance
*>
(
instance
);
*
name
=
ti
->
Name
().
c_str
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceKind
(
TRITONBACKEND_ModelInstance
*
instance
,
TRITONSERVER_InstanceGroupKind
*
kind
)
{
TritonModelInstance
*
ti
=
reinterpret_cast
<
TritonModelInstance
*>
(
instance
);
*
kind
=
ti
->
Kind
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceDeviceId
(
TRITONBACKEND_ModelInstance
*
instance
,
int32_t
*
device_id
)
{
TritonModelInstance
*
ti
=
reinterpret_cast
<
TritonModelInstance
*>
(
instance
);
*
device_id
=
ti
->
DeviceId
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceHostPolicy
(
TRITONBACKEND_ModelInstance
*
instance
,
TRITONSERVER_Message
**
host_policy
)
{
TritonModelInstance
*
ti
=
reinterpret_cast
<
TritonModelInstance
*>
(
instance
);
*
host_policy
=
const_cast
<
TRITONSERVER_Message
*>
(
reinterpret_cast
<
const
TRITONSERVER_Message
*>
(
&
ti
->
HostPolicyMessage
()));
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceProfileCount
(
TRITONBACKEND_ModelInstance
*
instance
,
uint32_t
*
count
)
{
TritonModelInstance
*
ti
=
reinterpret_cast
<
TritonModelInstance
*>
(
instance
);
*
count
=
ti
->
Profiles
().
size
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceProfileName
(
TRITONBACKEND_ModelInstance
*
instance
,
const
uint32_t
index
,
const
char
**
profile_name
)
{
*
profile_name
=
nullptr
;
TritonModelInstance
*
ti
=
reinterpret_cast
<
TritonModelInstance
*>
(
instance
);
const
auto
&
rprofiles
=
ti
->
Profiles
();
if
(
index
>=
rprofiles
.
size
())
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"out of bounds index "
)
+
std
::
to_string
(
index
)
+
": instance is configured with "
+
std
::
to_string
(
rprofiles
.
size
())
+
" profiles"
)
.
c_str
());
}
*
profile_name
=
rprofiles
[
index
].
c_str
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceSecondaryDeviceCount
(
TRITONBACKEND_ModelInstance
*
instance
,
uint32_t
*
count
)
{
TritonModelInstance
*
ti
=
reinterpret_cast
<
TritonModelInstance
*>
(
instance
);
*
count
=
ti
->
SecondaryDevices
().
size
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceSecondaryDeviceProperties
(
TRITONBACKEND_ModelInstance
*
instance
,
uint32_t
index
,
const
char
**
kind
,
int64_t
*
id
)
{
TritonModelInstance
*
ti
=
reinterpret_cast
<
TritonModelInstance
*>
(
instance
);
const
auto
&
rsecondarydevices
=
ti
->
SecondaryDevices
();
if
(
index
>=
rsecondarydevices
.
size
())
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"out of bounds index "
)
+
std
::
to_string
(
index
)
+
": instance is configured with "
+
std
::
to_string
(
rsecondarydevices
.
size
())
+
" secondary devices"
)
.
c_str
());
}
*
kind
=
rsecondarydevices
[
index
].
kind_
.
c_str
();
*
id
=
rsecondarydevices
[
index
].
id_
;
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceIsPassive
(
TRITONBACKEND_ModelInstance
*
instance
,
bool
*
is_passive
)
{
TritonModelInstance
*
ti
=
reinterpret_cast
<
TritonModelInstance
*>
(
instance
);
*
is_passive
=
ti
->
IsPassive
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceModel
(
TRITONBACKEND_ModelInstance
*
instance
,
TRITONBACKEND_Model
**
model
)
{
TritonModelInstance
*
ti
=
reinterpret_cast
<
TritonModelInstance
*>
(
instance
);
*
model
=
reinterpret_cast
<
TRITONBACKEND_Model
*>
(
ti
->
Model
());
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceState
(
TRITONBACKEND_ModelInstance
*
instance
,
void
**
state
)
{
TritonModelInstance
*
ti
=
reinterpret_cast
<
TritonModelInstance
*>
(
instance
);
*
state
=
ti
->
State
();
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceSetState
(
TRITONBACKEND_ModelInstance
*
instance
,
void
*
state
)
{
TritonModelInstance
*
ti
=
reinterpret_cast
<
TritonModelInstance
*>
(
instance
);
ti
->
SetState
(
state
);
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceReportStatistics
(
TRITONBACKEND_ModelInstance
*
instance
,
TRITONBACKEND_Request
*
request
,
const
bool
success
,
const
uint64_t
exec_start_ns
,
const
uint64_t
compute_start_ns
,
const
uint64_t
compute_end_ns
,
const
uint64_t
exec_end_ns
)
{
#ifdef TRITON_ENABLE_STATS
TritonModelInstance
*
ti
=
reinterpret_cast
<
TritonModelInstance
*>
(
instance
);
InferenceRequest
*
tr
=
reinterpret_cast
<
InferenceRequest
*>
(
request
);
tr
->
ReportStatistics
(
ti
->
MetricReporter
(),
success
,
exec_start_ns
,
compute_start_ns
,
compute_end_ns
,
exec_end_ns
);
#endif // TRITON_ENABLE_STATS
return
nullptr
;
// success
}
TRITONAPI_DECLSPEC
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceReportBatchStatistics
(
TRITONBACKEND_ModelInstance
*
instance
,
const
uint64_t
batch_size
,
const
uint64_t
exec_start_ns
,
const
uint64_t
compute_start_ns
,
const
uint64_t
compute_end_ns
,
const
uint64_t
exec_end_ns
)
{
#ifdef TRITON_ENABLE_STATS
TritonModelInstance
*
ti
=
reinterpret_cast
<
TritonModelInstance
*>
(
instance
);
ti
->
Model
()
->
MutableStatsAggregator
()
->
UpdateInferBatchStats
(
ti
->
MetricReporter
(),
batch_size
,
exec_start_ns
,
compute_start_ns
,
compute_end_ns
,
exec_end_ns
);
#endif // TRITON_ENABLE_STATS
return
nullptr
;
// success
}
}
// extern C
}}
// namespace triton::core
3rdparty/core-r22.12/src/backend_model_instance.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <functional>
#include <future>
#include <memory>
#include <string>
#include <thread>
#include "constants.h"
#include "memory.h"
#include "metric_model_reporter.h"
#include "model_config.pb.h"
#include "server_message.h"
#include "status.h"
#include "triton/common/sync_queue.h"
namespace
triton
{
namespace
core
{
class
TritonModel
;
class
InferenceRequest
;
//
// Represents a model instance.
//
class
TritonModelInstance
{
public:
static
Status
CreateInstances
(
TritonModel
*
model
,
const
triton
::
common
::
BackendCmdlineConfigMap
&
backend_cmdline_config_map
,
const
triton
::
common
::
HostPolicyCmdlineConfigMap
&
host_policy_map
,
const
inference
::
ModelConfig
&
model_config
,
const
bool
device_blocking
);
~
TritonModelInstance
();
const
std
::
string
&
Name
()
const
{
return
name_
;
}
size_t
Index
()
const
{
return
index_
;
}
TRITONSERVER_InstanceGroupKind
Kind
()
const
{
return
kind_
;
}
int32_t
DeviceId
()
const
{
return
device_id_
;
}
const
triton
::
common
::
HostPolicyCmdlineConfig
&
HostPolicy
()
const
{
return
host_policy_
;
}
const
TritonServerMessage
&
HostPolicyMessage
()
const
{
return
host_policy_message_
;
}
bool
IsPassive
()
const
{
return
passive_
;
}
const
std
::
vector
<
std
::
string
>&
Profiles
()
const
{
return
profile_names_
;
}
struct
SecondaryDevice
{
SecondaryDevice
(
const
std
::
string
kind
,
const
int64_t
id
)
:
kind_
(
kind
),
id_
(
id
)
{
}
const
std
::
string
kind_
;
const
int64_t
id_
;
};
const
std
::
vector
<
SecondaryDevice
>&
SecondaryDevices
()
const
{
return
secondary_devices_
;
}
Status
Initialize
();
Status
WarmUp
();
void
Schedule
(
std
::
vector
<
std
::
unique_ptr
<
InferenceRequest
>>&&
requests
,
const
std
::
function
<
void
()
>&
OnCompletion
);
TritonModel
*
Model
()
const
{
return
model_
;
}
void
*
State
()
{
return
state_
;
}
void
SetState
(
void
*
state
)
{
state_
=
state
;
}
MetricModelReporter
*
MetricReporter
()
const
{
return
reporter_
.
get
();
}
private:
DISALLOW_COPY_AND_ASSIGN
(
TritonModelInstance
);
class
TritonBackendThread
;
TritonModelInstance
(
TritonModel
*
model
,
const
std
::
string
&
name
,
const
size_t
index
,
const
TRITONSERVER_InstanceGroupKind
kind
,
const
int32_t
device_id
,
const
std
::
vector
<
std
::
string
>&
profile_names
,
const
bool
passive
,
const
triton
::
common
::
HostPolicyCmdlineConfig
&
host_policy
,
const
TritonServerMessage
&
host_policy_message
,
const
std
::
vector
<
SecondaryDevice
>&
secondary_devices
);
static
Status
CreateInstance
(
TritonModel
*
model
,
const
std
::
string
&
name
,
const
size_t
index
,
const
TRITONSERVER_InstanceGroupKind
kind
,
const
int32_t
device_id
,
const
std
::
vector
<
std
::
string
>&
profile_names
,
const
bool
passive
,
const
std
::
string
&
host_policy_name
,
const
triton
::
common
::
HostPolicyCmdlineConfig
&
host_policy
,
const
inference
::
ModelRateLimiter
&
rate_limiter_config
,
const
bool
device_blocking
,
std
::
map
<
uint32_t
,
std
::
shared_ptr
<
TritonBackendThread
>>*
device_to_thread_map
,
const
std
::
vector
<
SecondaryDevice
>&
secondary_devices
);
Status
SetBackendThread
(
const
TRITONSERVER_InstanceGroupKind
kind
,
const
int32_t
device_id
,
const
bool
device_blocking
,
std
::
map
<
uint32_t
,
std
::
shared_ptr
<
TritonBackendThread
>>*
device_to_thread_map
);
Status
GenerateWarmupData
();
void
Execute
(
std
::
vector
<
TRITONBACKEND_Request
*>&
triton_requests
);
class
TritonBackendThread
{
public:
static
Status
CreateBackendThread
(
const
std
::
string
name
,
TritonModelInstance
*
model
,
const
int
nice
,
const
int32_t
device_id
,
std
::
unique_ptr
<
TritonBackendThread
>*
triton_backend_thread
);
void
AddModelInstance
(
TritonModelInstance
*
model_instance
);
Status
InitAndWarmUpModelInstance
(
TritonModelInstance
*
model_instance
);
void
StopBackendThread
();
~
TritonBackendThread
();
private:
TritonBackendThread
(
const
std
::
string
&
name
,
TritonModel
*
model
);
void
BackendThread
(
const
int
nice
,
const
int32_t
device_id
);
std
::
string
name_
;
TritonModel
*
model_
;
std
::
deque
<
TritonModelInstance
*>
model_instances_
;
std
::
thread
backend_thread_
;
std
::
atomic
<
bool
>
backend_thread_exit_
;
};
std
::
shared_ptr
<
TritonBackendThread
>
triton_backend_thread_
;
struct
WarmupData
{
WarmupData
(
const
std
::
string
&
sample_name
,
const
size_t
count
)
:
sample_name_
(
sample_name
),
count_
(
std
::
max
(
count
,
size_t
{
1
}))
{
}
std
::
string
sample_name_
;
size_t
count_
;
// Using a batch of requests to satisfy batch size, this provides better
// alignment on the batch expected by the model, especially for sequence
// model.
std
::
vector
<
std
::
unique_ptr
<
InferenceRequest
>>
requests_
;
// Placeholder for input data
std
::
unique_ptr
<
AllocatedMemory
>
zero_data_
;
std
::
unique_ptr
<
AllocatedMemory
>
random_data_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
string
>>
provided_data_
;
};
std
::
vector
<
WarmupData
>
warmup_samples_
;
// The TritonModel object that owns this instance. The instance
// holds this as a raw pointer because the lifetime of the model is
// guaranteed to be longer than the lifetime of an instance owned by the
// model.
TritonModel
*
model_
;
std
::
string
name_
;
size_t
index_
;
// For CPU device_id_ is always 0. For GPU device_id_ indicates the
// GPU device to be used by the instance.
TRITONSERVER_InstanceGroupKind
kind_
;
int32_t
device_id_
;
const
triton
::
common
::
HostPolicyCmdlineConfig
host_policy_
;
TritonServerMessage
host_policy_message_
;
std
::
vector
<
std
::
string
>
profile_names_
;
bool
passive_
;
std
::
vector
<
SecondaryDevice
>
secondary_devices_
;
// Reporter for metrics, or nullptr if no metrics should be reported
std
::
shared_ptr
<
MetricModelReporter
>
reporter_
;
// Opaque state associated with this model instance.
void
*
state_
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/buffer_attributes.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "buffer_attributes.h"
#include <cstring>
#include "constants.h"
namespace
triton
{
namespace
core
{
void
BufferAttributes
::
SetByteSize
(
const
size_t
&
byte_size
)
{
byte_size_
=
byte_size
;
}
void
BufferAttributes
::
SetMemoryType
(
const
TRITONSERVER_MemoryType
&
memory_type
)
{
memory_type_
=
memory_type
;
}
void
BufferAttributes
::
SetMemoryTypeId
(
const
int64_t
&
memory_type_id
)
{
memory_type_id_
=
memory_type_id
;
}
void
BufferAttributes
::
SetCudaIpcHandle
(
void
*
cuda_ipc_handle
)
{
char
*
lcuda_ipc_handle
=
reinterpret_cast
<
char
*>
(
cuda_ipc_handle
);
cuda_ipc_handle_
.
clear
();
std
::
copy
(
lcuda_ipc_handle
,
lcuda_ipc_handle
+
CUDA_IPC_STRUCT_SIZE
,
std
::
back_inserter
(
cuda_ipc_handle_
));
}
void
*
BufferAttributes
::
CudaIpcHandle
()
{
if
(
cuda_ipc_handle_
.
empty
())
{
return
nullptr
;
}
else
{
return
reinterpret_cast
<
void
*>
(
cuda_ipc_handle_
.
data
());
}
}
size_t
BufferAttributes
::
ByteSize
()
const
{
return
byte_size_
;
}
TRITONSERVER_MemoryType
BufferAttributes
::
MemoryType
()
const
{
return
memory_type_
;
}
int64_t
BufferAttributes
::
MemoryTypeId
()
const
{
return
memory_type_id_
;
}
BufferAttributes
::
BufferAttributes
(
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
,
char
*
cuda_ipc_handle
)
:
byte_size_
(
byte_size
),
memory_type_
(
memory_type
),
memory_type_id_
(
memory_type_id
)
{
// cuda ipc handle size
cuda_ipc_handle_
.
reserve
(
CUDA_IPC_STRUCT_SIZE
);
if
(
cuda_ipc_handle
!=
nullptr
)
{
std
::
copy
(
cuda_ipc_handle
,
cuda_ipc_handle
+
CUDA_IPC_STRUCT_SIZE
,
std
::
back_inserter
(
cuda_ipc_handle_
));
}
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/buffer_attributes.h
deleted
100644 → 0
View file @
d592fbea
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <iterator>
#include <vector>
#include "tritonserver_apis.h"
#pragma once
namespace
triton
{
namespace
core
{
//
// A class to hold information about the buffer allocation.
//
class
BufferAttributes
{
public:
BufferAttributes
(
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
,
char
cuda_ipc_handle
[
64
]);
BufferAttributes
()
{
memory_type_
=
TRITONSERVER_MEMORY_CPU
;
memory_type_id_
=
0
;
cuda_ipc_handle_
.
reserve
(
64
);
}
// Set the buffer byte size
void
SetByteSize
(
const
size_t
&
byte_size
);
// Set the buffer memory_type
void
SetMemoryType
(
const
TRITONSERVER_MemoryType
&
memory_type
);
// Set the buffer memory type id
void
SetMemoryTypeId
(
const
int64_t
&
memory_type_id
);
// Set the cuda ipc handle
void
SetCudaIpcHandle
(
void
*
cuda_ipc_handle
);
// Get the cuda ipc handle
void
*
CudaIpcHandle
();
// Get the byte size
size_t
ByteSize
()
const
;
// Get the memory type
TRITONSERVER_MemoryType
MemoryType
()
const
;
// Get the memory type id
int64_t
MemoryTypeId
()
const
;
private:
size_t
byte_size_
;
TRITONSERVER_MemoryType
memory_type_
;
int64_t
memory_type_id_
;
std
::
vector
<
char
>
cuda_ipc_handle_
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/constants.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <stdint.h>
namespace
triton
{
namespace
core
{
constexpr
char
kInferHeaderContentLengthHTTPHeader
[]
=
"Inference-Header-Content-Length"
;
constexpr
char
kAcceptEncodingHTTPHeader
[]
=
"Accept-Encoding"
;
constexpr
char
kContentEncodingHTTPHeader
[]
=
"Content-Encoding"
;
constexpr
char
kContentTypeHeader
[]
=
"Content-Type"
;
constexpr
char
kContentLengthHeader
[]
=
"Content-Length"
;
constexpr
char
kTensorFlowGraphDefPlatform
[]
=
"tensorflow_graphdef"
;
constexpr
char
kTensorFlowSavedModelPlatform
[]
=
"tensorflow_savedmodel"
;
constexpr
char
kTensorFlowGraphDefFilename
[]
=
"model.graphdef"
;
constexpr
char
kTensorFlowSavedModelFilename
[]
=
"model.savedmodel"
;
constexpr
char
kTensorFlowBackend
[]
=
"tensorflow"
;
constexpr
char
kTensorRTPlanPlatform
[]
=
"tensorrt_plan"
;
constexpr
char
kTensorRTPlanFilename
[]
=
"model.plan"
;
constexpr
char
kTensorRTBackend
[]
=
"tensorrt"
;
constexpr
char
kOnnxRuntimeOnnxPlatform
[]
=
"onnxruntime_onnx"
;
constexpr
char
kOnnxRuntimeOnnxFilename
[]
=
"model.onnx"
;
constexpr
char
kOnnxRuntimeBackend
[]
=
"onnxruntime"
;
constexpr
char
kOpenVINORuntimeOpenVINOFilename
[]
=
"model.xml"
;
constexpr
char
kOpenVINORuntimeBackend
[]
=
"openvino"
;
constexpr
char
kPyTorchLibTorchPlatform
[]
=
"pytorch_libtorch"
;
constexpr
char
kPyTorchLibTorchFilename
[]
=
"model.pt"
;
constexpr
char
kPyTorchBackend
[]
=
"pytorch"
;
constexpr
char
kPythonFilename
[]
=
"model.py"
;
constexpr
char
kPythonBackend
[]
=
"python"
;
#ifdef TRITON_ENABLE_ENSEMBLE
constexpr
char
kEnsemblePlatform
[]
=
"ensemble"
;
#endif // TRITON_ENABLE_ENSEMBLE
constexpr
char
kTensorRTExecutionAccelerator
[]
=
"tensorrt"
;
constexpr
char
kOpenVINOExecutionAccelerator
[]
=
"openvino"
;
constexpr
char
kGPUIOExecutionAccelerator
[]
=
"gpu_io"
;
constexpr
char
kAutoMixedPrecisionExecutionAccelerator
[]
=
"auto_mixed_precision"
;
constexpr
char
kModelConfigPbTxt
[]
=
"config.pbtxt"
;
constexpr
char
kMetricsLabelModelName
[]
=
"model"
;
constexpr
char
kMetricsLabelModelVersion
[]
=
"version"
;
constexpr
char
kMetricsLabelGpuUuid
[]
=
"gpu_uuid"
;
constexpr
char
kWarmupDataFolder
[]
=
"warmup"
;
constexpr
char
kInitialStateFolder
[]
=
"initial_state"
;
constexpr
uint64_t
NANOS_PER_SECOND
=
1000000000
;
constexpr
uint64_t
NANOS_PER_MILLIS
=
1000000
;
constexpr
int
MAX_GRPC_MESSAGE_SIZE
=
INT32_MAX
;
constexpr
uint64_t
SEQUENCE_IDLE_DEFAULT_MICROSECONDS
=
1000
*
1000
;
constexpr
size_t
STRING_CORRELATION_ID_MAX_LENGTH_BYTES
=
128
;
constexpr
size_t
CUDA_IPC_STRUCT_SIZE
=
64
;
#ifdef TRITON_ENABLE_METRICS
// MetricModelReporter expects a device ID for GPUs, but we reuse this device
// ID for other metrics as well such as for CPU and Response Cache metrics
constexpr
int
METRIC_REPORTER_ID_CPU
=
-
1
;
constexpr
int
METRIC_REPORTER_ID_RESPONSE_CACHE
=
-
2
;
#endif
#define TIMESPEC_TO_NANOS(TS) \
((TS).tv_sec * triton::core::NANOS_PER_SECOND + (TS).tv_nsec)
#define TIMESPEC_TO_MILLIS(TS) \
(TIMESPEC_TO_NANOS(TS) / triton::core::NANOS_PER_MILLIS)
#define DISALLOW_MOVE(TypeName) TypeName(Context&& o) = delete;
#define DISALLOW_COPY(TypeName) TypeName(const TypeName&) = delete;
#define DISALLOW_ASSIGN(TypeName) void operator=(const TypeName&) = delete;
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
DISALLOW_COPY(TypeName) \
DISALLOW_ASSIGN(TypeName)
}}
// namespace triton::core
3rdparty/core-r22.12/src/cuda_memory_manager.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include "cuda_memory_manager.h"
#include <cnmem.h>
#include <string.h>
#include <set>
#include "cuda_utils.h"
#include "triton/common/logging.h"
namespace
{
#define RETURN_IF_CNMEM_ERROR(S, MSG) \
do { \
auto status__ = (S); \
if (status__ != CNMEM_STATUS_SUCCESS) { \
return Status( \
Status::Code::INTERNAL, \
(MSG) + ": " + cnmemGetErrorString(status__)); \
} \
} while (false)
std
::
string
PointerToString
(
void
*
ptr
)
{
std
::
stringstream
ss
;
ss
<<
ptr
;
return
ss
.
str
();
}
}
// namespace
namespace
triton
{
namespace
core
{
std
::
unique_ptr
<
CudaMemoryManager
>
CudaMemoryManager
::
instance_
;
std
::
mutex
CudaMemoryManager
::
instance_mu_
;
CudaMemoryManager
::~
CudaMemoryManager
()
{
if
(
has_allocation_
)
{
auto
status
=
cnmemFinalize
();
if
(
status
!=
CNMEM_STATUS_SUCCESS
)
{
LOG_ERROR
<<
"Failed to finalize CUDA memory manager: ["
<<
status
<<
"] "
<<
cnmemGetErrorString
(
status
);
}
}
}
void
CudaMemoryManager
::
Reset
()
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
instance_mu_
);
instance_
.
reset
();
}
Status
CudaMemoryManager
::
Create
(
const
CudaMemoryManager
::
Options
&
options
)
{
// Ensure thread-safe creation of CUDA memory pool
std
::
lock_guard
<
std
::
mutex
>
lock
(
instance_mu_
);
if
(
instance_
!=
nullptr
)
{
LOG_WARNING
<<
"New CUDA memory pools could not be created since they "
"already exists"
;
return
Status
::
Success
;
}
std
::
set
<
int
>
supported_gpus
;
auto
status
=
GetSupportedGPUs
(
&
supported_gpus
,
options
.
min_supported_compute_capability_
);
if
(
status
.
IsOk
())
{
std
::
vector
<
cnmemDevice_t
>
devices
;
for
(
auto
gpu
:
supported_gpus
)
{
const
auto
it
=
options
.
memory_pool_byte_size_
.
find
(
gpu
);
if
((
it
!=
options
.
memory_pool_byte_size_
.
end
())
&&
(
it
->
second
!=
0
))
{
devices
.
emplace_back
();
auto
&
device
=
devices
.
back
();
memset
(
&
device
,
0
,
sizeof
(
device
));
device
.
device
=
gpu
;
device
.
size
=
it
->
second
;
LOG_INFO
<<
"CUDA memory pool is created on device "
<<
device
.
device
<<
" with size "
<<
device
.
size
;
}
}
if
(
!
devices
.
empty
())
{
RETURN_IF_CNMEM_ERROR
(
cnmemInit
(
devices
.
size
(),
devices
.
data
(),
CNMEM_FLAGS_CANNOT_GROW
),
std
::
string
(
"Failed to finalize CUDA memory manager"
));
}
else
{
LOG_INFO
<<
"CUDA memory pool disabled"
;
}
// Use to finalize CNMeM properly when out of scope
instance_
.
reset
(
new
CudaMemoryManager
(
!
devices
.
empty
()));
}
else
{
return
Status
(
status
.
ErrorCode
(),
"Failed to initialize CUDA memory manager: "
+
status
.
Message
());
}
return
Status
::
Success
;
}
Status
CudaMemoryManager
::
Alloc
(
void
**
ptr
,
uint64_t
size
,
int64_t
device_id
)
{
if
(
instance_
==
nullptr
)
{
return
Status
(
Status
::
Code
::
UNAVAILABLE
,
"CudaMemoryManager has not been created"
);
}
else
if
(
!
instance_
->
has_allocation_
)
{
return
Status
(
Status
::
Code
::
UNAVAILABLE
,
"CudaMemoryManager has no preallocated CUDA memory"
);
}
int
current_device
;
RETURN_IF_CUDA_ERR
(
cudaGetDevice
(
&
current_device
),
std
::
string
(
"Failed to get device"
));
bool
overridden
=
(
current_device
!=
device_id
);
if
(
overridden
)
{
RETURN_IF_CUDA_ERR
(
cudaSetDevice
(
device_id
),
std
::
string
(
"Failed to set device"
));
}
// Defer returning error to make sure the device is recovered
auto
err
=
cnmemMalloc
(
ptr
,
size
,
nullptr
);
if
(
overridden
)
{
cudaSetDevice
(
current_device
);
}
RETURN_IF_CNMEM_ERROR
(
err
,
std
::
string
(
"Failed to allocate CUDA memory with byte size "
)
+
std
::
to_string
(
size
)
+
" on GPU "
+
std
::
to_string
(
device_id
));
return
Status
::
Success
;
}
Status
CudaMemoryManager
::
Free
(
void
*
ptr
,
int64_t
device_id
)
{
if
(
instance_
==
nullptr
)
{
return
Status
(
Status
::
Code
::
UNAVAILABLE
,
"CudaMemoryManager has not been created"
);
}
else
if
(
!
instance_
->
has_allocation_
)
{
return
Status
(
Status
::
Code
::
UNAVAILABLE
,
"CudaMemoryManager has no preallocated CUDA memory"
);
}
int
current_device
;
RETURN_IF_CUDA_ERR
(
cudaGetDevice
(
&
current_device
),
std
::
string
(
"Failed to get device"
));
bool
overridden
=
(
current_device
!=
device_id
);
if
(
overridden
)
{
RETURN_IF_CUDA_ERR
(
cudaSetDevice
(
device_id
),
std
::
string
(
"Failed to set device"
));
}
// Defer returning error to make sure the device is recovered
auto
err
=
cnmemFree
(
ptr
,
nullptr
);
if
(
overridden
)
{
cudaSetDevice
(
current_device
);
}
RETURN_IF_CNMEM_ERROR
(
err
,
std
::
string
(
"Failed to deallocate CUDA memory at address "
)
+
PointerToString
(
ptr
)
+
" on GPU "
+
std
::
to_string
(
device_id
));
return
Status
::
Success
;
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/cuda_memory_manager.h
deleted
100644 → 0
View file @
d592fbea
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#pragma once
#include <map>
#include <memory>
#include <mutex>
#include "status.h"
namespace
triton
{
namespace
core
{
// This is a singleton class responsible for maintaining CUDA memory pool
// used by the inference server. CUDA memory allocations and deallocations
// must be requested via functions provided by this class.
class
CudaMemoryManager
{
public:
// Options to configure CUDA memory manager.
struct
Options
{
Options
(
double
cc
=
6.0
,
const
std
::
map
<
int
,
uint64_t
>&
s
=
{})
:
min_supported_compute_capability_
(
cc
),
memory_pool_byte_size_
(
s
)
{
}
// The minimum compute capability of the supported devices.
double
min_supported_compute_capability_
;
// The size of CUDA memory reserved for the specified devices.
// The memory size will be rounded up to align with
// the default granularity (512 bytes).
// No memory will be reserved for devices that is not listed.
std
::
map
<
int
,
uint64_t
>
memory_pool_byte_size_
;
};
~
CudaMemoryManager
();
// Create the memory manager based on 'options' specified.
// Return Status object indicating success or failure.
static
Status
Create
(
const
Options
&
options
);
// Allocate CUDA memory on GPU 'device_id' with
// the requested 'size' and return the pointer in 'ptr'.
// Return Status object indicating success or failure.
static
Status
Alloc
(
void
**
ptr
,
uint64_t
size
,
int64_t
device_id
);
// Free the memory allocated by the memory manager on 'device_id'.
// Return Status object indicating success or failure.
static
Status
Free
(
void
*
ptr
,
int64_t
device_id
);
protected:
// Provide explicit control on the lifecycle of the CUDA memory manager,
// for testing only.
static
void
Reset
();
private:
CudaMemoryManager
(
bool
has_allocation
)
:
has_allocation_
(
has_allocation
)
{}
bool
has_allocation_
;
static
std
::
unique_ptr
<
CudaMemoryManager
>
instance_
;
static
std
::
mutex
instance_mu_
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/cuda_utils.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cuda_utils.h"
#include "model_config_utils.h"
#include "triton/common/nvtx.h"
namespace
triton
{
namespace
core
{
#ifdef TRITON_ENABLE_GPU
void
CUDART_CB
MemcpyHost
(
void
*
args
)
{
auto
*
copy_params
=
reinterpret_cast
<
CopyParams
*>
(
args
);
memcpy
(
copy_params
->
dst_
,
copy_params
->
src_
,
copy_params
->
byte_size_
);
delete
copy_params
;
}
#endif // TRITON_ENABLE_GPU
Status
GetDeviceMemoryInfo
(
const
int
device_id
,
size_t
*
free
,
size_t
*
total
)
{
*
free
=
0
;
*
total
=
0
;
#ifdef TRITON_ENABLE_GPU
// Make sure that correct device is set before creating stream and
// then restore the device to what was set by the caller.
int
current_device
;
auto
cuerr
=
cudaGetDevice
(
&
current_device
);
bool
overridden
=
false
;
if
(
cuerr
==
cudaSuccess
)
{
overridden
=
(
current_device
!=
device_id
);
if
(
overridden
)
{
cuerr
=
cudaSetDevice
(
device_id
);
}
}
if
(
cuerr
==
cudaSuccess
)
{
cuerr
=
cudaMemGetInfo
(
free
,
total
);
}
if
(
overridden
)
{
cudaSetDevice
(
current_device
);
}
if
(
cuerr
!=
cudaSuccess
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
(
std
::
string
(
"unable to get memory info for device "
)
+
std
::
to_string
(
device_id
)
+
": "
+
cudaGetErrorString
(
cuerr
)));
}
#endif // TRITON_ENABLE_GPU
return
Status
::
Success
;
}
Status
EnablePeerAccess
(
const
double
min_compute_capability
)
{
#ifdef TRITON_ENABLE_GPU
// If we can't enable peer access for one device pair, the best we can
// do is skipping it...
std
::
set
<
int
>
supported_gpus
;
bool
all_enabled
=
false
;
if
(
GetSupportedGPUs
(
&
supported_gpus
,
min_compute_capability
).
IsOk
())
{
all_enabled
=
true
;
int
can_access_peer
=
false
;
for
(
const
auto
&
host
:
supported_gpus
)
{
auto
cuerr
=
cudaSetDevice
(
host
);
if
(
cuerr
==
cudaSuccess
)
{
for
(
const
auto
&
peer
:
supported_gpus
)
{
if
(
host
==
peer
)
{
continue
;
}
cuerr
=
cudaDeviceCanAccessPeer
(
&
can_access_peer
,
host
,
peer
);
if
((
cuerr
==
cudaSuccess
)
&&
(
can_access_peer
==
1
))
{
cuerr
=
cudaDeviceEnablePeerAccess
(
peer
,
0
);
}
all_enabled
&=
((
cuerr
==
cudaSuccess
)
&&
(
can_access_peer
==
1
));
}
}
}
}
if
(
!
all_enabled
)
{
return
Status
(
Status
::
Code
::
UNSUPPORTED
,
"failed to enable peer access for some device pairs"
);
}
#endif // TRITON_ENABLE_GPU
return
Status
::
Success
;
}
Status
CopyBuffer
(
const
std
::
string
&
msg
,
const
TRITONSERVER_MemoryType
src_memory_type
,
const
int64_t
src_memory_type_id
,
const
TRITONSERVER_MemoryType
dst_memory_type
,
const
int64_t
dst_memory_type_id
,
const
size_t
byte_size
,
const
void
*
src
,
void
*
dst
,
cudaStream_t
cuda_stream
,
bool
*
cuda_used
,
bool
copy_on_stream
)
{
NVTX_RANGE
(
nvtx_
,
"CopyBuffer"
);
*
cuda_used
=
false
;
// For CUDA memcpy, all host to host copy will be blocked in respect to the
// host, so use memcpy() directly. In this case, need to be careful on whether
// the src buffer is valid.
if
((
src_memory_type
!=
TRITONSERVER_MEMORY_GPU
)
&&
(
dst_memory_type
!=
TRITONSERVER_MEMORY_GPU
))
{
#ifdef TRITON_ENABLE_GPU
if
(
copy_on_stream
)
{
auto
params
=
new
CopyParams
(
dst
,
src
,
byte_size
);
cudaLaunchHostFunc
(
cuda_stream
,
MemcpyHost
,
reinterpret_cast
<
void
*>
(
params
));
*
cuda_used
=
true
;
}
else
{
memcpy
(
dst
,
src
,
byte_size
);
}
#else
memcpy
(
dst
,
src
,
byte_size
);
#endif // TRITON_ENABLE_GPU
}
else
{
#ifdef TRITON_ENABLE_GPU
RETURN_IF_CUDA_ERR
(
cudaMemcpyAsync
(
dst
,
src
,
byte_size
,
cudaMemcpyDefault
,
cuda_stream
),
msg
+
": failed to perform CUDA copy"
);
*
cuda_used
=
true
;
#else
return
Status
(
Status
::
Code
::
INTERNAL
,
msg
+
": try to use CUDA copy while GPU is not supported"
);
#endif // TRITON_ENABLE_GPU
}
return
Status
::
Success
;
}
void
CopyBufferHandler
(
const
std
::
string
&
msg
,
const
TRITONSERVER_MemoryType
src_memory_type
,
const
int64_t
src_memory_type_id
,
const
TRITONSERVER_MemoryType
dst_memory_type
,
const
int64_t
dst_memory_type_id
,
const
size_t
byte_size
,
const
void
*
src
,
void
*
dst
,
cudaStream_t
cuda_stream
,
void
*
response_ptr
,
triton
::
common
::
SyncQueue
<
std
::
tuple
<
Status
,
bool
,
void
*>>*
completion_queue
)
{
bool
cuda_used
=
false
;
Status
status
=
CopyBuffer
(
msg
,
src_memory_type
,
src_memory_type_id
,
dst_memory_type
,
dst_memory_type_id
,
byte_size
,
src
,
dst
,
cuda_stream
,
&
cuda_used
);
completion_queue
->
Put
(
std
::
make_tuple
(
status
,
cuda_used
,
response_ptr
));
}
#ifdef TRITON_ENABLE_GPU
Status
CheckGPUCompatibility
(
const
int
gpu_id
,
const
double
min_compute_capability
)
{
// Query the compute capability from the device
cudaDeviceProp
cuprops
;
cudaError_t
cuerr
=
cudaGetDeviceProperties
(
&
cuprops
,
gpu_id
);
if
(
cuerr
!=
cudaSuccess
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"unable to get CUDA device properties for GPU ID"
+
std
::
to_string
(
gpu_id
)
+
": "
+
cudaGetErrorString
(
cuerr
));
}
double
compute_compability
=
cuprops
.
major
+
(
cuprops
.
minor
/
10.0
);
if
((
compute_compability
>
min_compute_capability
)
||
(
abs
(
compute_compability
-
min_compute_capability
)
<
0.01
))
{
return
Status
::
Success
;
}
else
{
return
Status
(
Status
::
Code
::
UNSUPPORTED
,
"gpu "
+
std
::
to_string
(
gpu_id
)
+
" has compute capability '"
+
std
::
to_string
(
cuprops
.
major
)
+
"."
+
std
::
to_string
(
cuprops
.
minor
)
+
"' which is less than the minimum supported of '"
+
std
::
to_string
(
min_compute_capability
)
+
"'"
);
}
}
Status
GetSupportedGPUs
(
std
::
set
<
int
>*
supported_gpus
,
const
double
min_compute_capability
)
{
// Make sure set is empty before starting
supported_gpus
->
clear
();
int
device_cnt
;
cudaError_t
cuerr
=
cudaGetDeviceCount
(
&
device_cnt
);
if
((
cuerr
==
cudaErrorNoDevice
)
||
(
cuerr
==
cudaErrorInsufficientDriver
))
{
device_cnt
=
0
;
}
else
if
(
cuerr
!=
cudaSuccess
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"unable to get number of CUDA devices: "
+
std
::
string
(
cudaGetErrorString
(
cuerr
)));
}
// populates supported_gpus
for
(
int
gpu_id
=
0
;
gpu_id
<
device_cnt
;
gpu_id
++
)
{
Status
status
=
CheckGPUCompatibility
(
gpu_id
,
min_compute_capability
);
if
(
status
.
IsOk
())
{
supported_gpus
->
insert
(
gpu_id
);
}
}
return
Status
::
Success
;
}
Status
SupportsIntegratedZeroCopy
(
const
int
gpu_id
,
bool
*
zero_copy_support
)
{
// Query the device to check if integrated
cudaDeviceProp
cuprops
;
cudaError_t
cuerr
=
cudaGetDeviceProperties
(
&
cuprops
,
gpu_id
);
if
(
cuerr
!=
cudaSuccess
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"unable to get CUDA device properties for GPU ID"
+
std
::
to_string
(
gpu_id
)
+
": "
+
cudaGetErrorString
(
cuerr
));
}
// Zero-copy supported only on integrated GPU when it can map host memory
if
(
cuprops
.
integrated
&&
cuprops
.
canMapHostMemory
)
{
*
zero_copy_support
=
true
;
}
else
{
*
zero_copy_support
=
false
;
}
return
Status
::
Success
;
}
#endif
}}
// namespace triton::core
Prev
1
2
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment