重新整理工程

fcefbf3d · xiabo · d592fbea · d592fbea · d592fbea · d592fbea
Commit fcefbf3d authored Nov 30, 2023 by xiabo
20 changed files
--- a/3rdparty/core-r22.12/cmake/TritonCoreConfig.cmake.in
+++ b/3rdparty/core-r22.12/cmake/TritonCoreConfig.cmake.in
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include(CMakeFindDependencyMacro)
-
-get_filename_component(
-  TRITONCORE_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
-)
-
-list(APPEND CMAKE_MODULE_PATH ${TRITONCORE_CMAKE_DIR})
-
-if(NOT TARGET TritonCore::triton-core-serverapi)
-  include("${TRITONCORE_CMAKE_DIR}/TritonCoreTargets.cmake")
-endif()
--- a/3rdparty/core-r22.12/include/triton/core/tritonbackend.h
+++ b/3rdparty/core-r22.12/include/triton/core/tritonbackend.h
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <stddef.h>
-#include <stdint.h>
-#include "triton/core/tritonserver.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef _COMPILING_TRITONBACKEND
-#if defined(_MSC_VER)
-#define TRITONBACKEND_DECLSPEC __declspec(dllexport)
-#define TRITONBACKEND_ISPEC __declspec(dllimport)
-#elif defined(__GNUC__)
-#define TRITONBACKEND_DECLSPEC __attribute__((__visibility__("default")))
-#define TRITONBACKEND_ISPEC
-#else
-#define TRITONBACKEND_DECLSPEC
-#define TRITONBACKEND_ISPEC
-#endif
-#else
-#if defined(_MSC_VER)
-#define TRITONBACKEND_DECLSPEC __declspec(dllimport)
-#define TRITONBACKEND_ISPEC __declspec(dllexport)
-#else
-#define TRITONBACKEND_DECLSPEC
-#define TRITONBACKEND_ISPEC
-#endif
-#endif
-
-struct TRITONBACKEND_MemoryManager;
-struct TRITONBACKEND_Input;
-struct TRITONBACKEND_Output;
-struct TRITONBACKEND_State;
-struct TRITONBACKEND_Request;
-struct TRITONBACKEND_ResponseFactory;
-struct TRITONBACKEND_Response;
-struct TRITONBACKEND_Backend;
-struct TRITONBACKEND_Model;
-struct TRITONBACKEND_ModelInstance;
-struct TRITONBACKEND_BackendAttribute;
-
-///
-/// TRITONBACKEND API Version
-///
-/// The TRITONBACKEND API is versioned with major and minor version
-/// numbers. Any change to the API that does not impact backwards
-/// compatibility (for example, adding a non-required function)
-/// increases the minor version number. Any change that breaks
-/// backwards compatibility (for example, deleting or changing the
-/// behavior of a function) increases the major version number. A
-/// backend should check that the API version used to compile the
-/// backend is compatible with the API version of the Triton server
-/// that it is running in. This is typically done by code similar to
-/// the following which makes sure that the major versions are equal
-/// and that the minor version of Triton is >= the minor version used
-/// to build the backend.
-///
-///   uint32_t api_version_major, api_version_minor;
-///   TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor);
-///   if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
-///       (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
-///     return TRITONSERVER_ErrorNew(
-///       TRITONSERVER_ERROR_UNSUPPORTED,
-///       "triton backend API version does not support this backend");
-///   }
-///
-#define TRITONBACKEND_API_VERSION_MAJOR 1
-#define TRITONBACKEND_API_VERSION_MINOR 10
-
-/// Get the TRITONBACKEND API version supported by Triton. This value
-/// can be compared against the TRITONBACKEND_API_VERSION_MAJOR and
-/// TRITONBACKEND_API_VERSION_MINOR used to build the backend to
-/// ensure that Triton is compatible with the backend.
-///
-/// \param major Returns the TRITONBACKEND API major version supported
-/// by Triton.
-/// \param minor Returns the TRITONBACKEND API minor version supported
-/// by Triton.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ApiVersion(
-    uint32_t* major, uint32_t* minor);
-
-/// TRITONBACKEND_ArtifactType
-///
-/// The ways that the files that make up a backend or model are
-/// communicated to the backend.
-///
-///   TRITONBACKEND_ARTIFACT_FILESYSTEM: The model or backend
-///     artifacts are made available to Triton via a locally
-///     accessible filesystem. The backend can access these files
-///     using an appropriate system API.
-///
-typedef enum TRITONBACKEND_artifacttype_enum {
-  TRITONBACKEND_ARTIFACT_FILESYSTEM
-} TRITONBACKEND_ArtifactType;
-
-
-///
-/// TRITONBACKEND_MemoryManager
-///
-/// Object representing an memory manager that is capable of
-/// allocating and otherwise managing different memory types. For
-/// improved performance Triton maintains pools for GPU and CPU-pinned
-/// memory and the memory manager allows backends to access those
-/// pools.
-///
-
-/// Allocate a contiguous block of memory of a specific type using a
-/// memory manager. Two error codes have specific interpretations for
-/// this function:
-///
-///   TRITONSERVER_ERROR_UNSUPPORTED: Indicates that Triton is
-///     incapable of allocating the requested memory type and memory
-///     type ID. Requests for the memory type and ID will always fail
-///     no matter 'byte_size' of the request.
-///
-///   TRITONSERVER_ERROR_UNAVAILABLE: Indicates that Triton can
-///      allocate the memory type and ID but that currently it cannot
-///      allocate a contiguous block of memory of the requested
-///      'byte_size'.
-///
-/// \param manager The memory manager.
-/// \param buffer Returns the allocated memory.
-/// \param memory_type The type of memory to allocate.
-/// \param memory_type_id The ID associated with the memory type to
-/// allocate. For GPU memory this indicates the device ID of the GPU
-/// to allocate from.
-/// \param byte_size The size of memory to allocate, in bytes.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_MemoryManagerAllocate(
-    TRITONBACKEND_MemoryManager* manager, void** buffer,
-    const TRITONSERVER_MemoryType memory_type, const int64_t memory_type_id,
-    const uint64_t byte_size);
-
-/// Free a buffer that was previously allocated with
-/// TRITONBACKEND_MemoryManagerAllocate. The call must provide the
-/// same values for 'memory_type' and 'memory_type_id' as were used
-/// when the buffer was allocate or else the behavior is undefined.
-///
-/// \param manager The memory manager.
-/// \param buffer The allocated memory buffer to free.
-/// \param memory_type The type of memory of the buffer.
-/// \param memory_type_id The ID associated with the memory type of
-/// the buffer.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_MemoryManagerFree(
-    TRITONBACKEND_MemoryManager* manager, void* buffer,
-    const TRITONSERVER_MemoryType memory_type, const int64_t memory_type_id);
-
-///
-/// TRITONBACKEND_Input
-///
-/// Object representing an input tensor.
-///
-
-/// Get the name and properties of an input tensor. The returned
-/// strings and other properties are owned by the input, not the
-/// caller, and so should not be modified or freed.
-///
-/// \param input The input tensor.
-/// \param name If non-nullptr, returns the tensor name.
-/// \param datatype If non-nullptr, returns the tensor datatype.
-/// \param shape If non-nullptr, returns the tensor shape.
-/// \param dim_count If non-nullptr, returns the number of dimensions
-/// in the tensor shape.
-/// \param byte_size If non-nullptr, returns the size of the available
-/// data for the tensor, in bytes. This size reflects the actual data
-/// available, and does not necessarily match what is
-/// expected/required for the tensor given its shape and datatype. It
-/// is the responsibility of the backend to handle mismatches in these
-/// sizes appropriately.
-/// \param buffer_count If non-nullptr, returns the number of buffers
-/// holding the contents of the tensor. These buffers are accessed
-/// using TRITONBACKEND_InputBuffer.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_InputProperties(
-    TRITONBACKEND_Input* input, const char** name,
-    TRITONSERVER_DataType* datatype, const int64_t** shape,
-    uint32_t* dims_count, uint64_t* byte_size, uint32_t* buffer_count);
-
-/// Get the name and properties of an input tensor associated with a given
-/// host policy. If there are no input buffers for the specified  host policy,
-/// the properties of the fallback input buffers are returned. The returned
-/// strings and other properties are owned by the input, not the caller, and so
-/// should not be modified or freed.
-///
-/// \param input The input tensor.
-/// \param host_policy_name The host policy name. Fallback input properties
-/// will be return if nullptr is provided.
-/// \param name If non-nullptr, returns the tensor name.
-/// \param datatype If non-nullptr, returns the tensor datatype.
-/// \param shape If non-nullptr, returns the tensor shape.
-/// \param dim_count If non-nullptr, returns the number of dimensions
-/// in the tensor shape.
-/// \param byte_size If non-nullptr, returns the size of the available
-/// data for the tensor, in bytes. This size reflects the actual data
-/// available, and does not necessarily match what is
-/// expected/required for the tensor given its shape and datatype. It
-/// is the responsibility of the backend to handle mismatches in these
-/// sizes appropriately.
-/// \param buffer_count If non-nullptr, returns the number of buffers
-/// holding the contents of the tensor. These buffers are accessed
-/// using TRITONBACKEND_InputBufferForHostPolicy.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_InputPropertiesForHostPolicy(
-    TRITONBACKEND_Input* input, const char* host_policy_name, const char** name,
-    TRITONSERVER_DataType* datatype, const int64_t** shape,
-    uint32_t* dims_count, uint64_t* byte_size, uint32_t* buffer_count);
-
-/// Get a buffer holding (part of) the tensor data for an input. For a
-/// given input the number of buffers composing the input are found
-/// from 'buffer_count' returned by TRITONBACKEND_InputProperties. The
-/// returned buffer is owned by the input and so should not be
-/// modified or freed by the caller. The lifetime of the buffer
-/// matches that of the input and so the buffer should not be accessed
-/// after the input tensor object is released.
-///
-/// \param input The input tensor.
-/// \param index The index of the buffer. Must be 0 <= index <
-/// buffer_count, where buffer_count is the value returned by
-/// TRITONBACKEND_InputProperties.
-/// \param buffer Returns a pointer to a contiguous block of data for
-/// the named input.
-/// \param buffer_byte_size Returns the size, in bytes, of 'buffer'.
-/// \param memory_type Acts as both input and output. On input gives
-/// the buffer memory type preferred by the function caller.  Returns
-/// the actual memory type of 'buffer'.
-/// \param memory_type_id Acts as both input and output. On input
-/// gives the buffer memory type id preferred by the function caller.
-/// Returns the actual memory type id of 'buffer'.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_InputBuffer(
-    TRITONBACKEND_Input* input, const uint32_t index, const void** buffer,
-    uint64_t* buffer_byte_size, TRITONSERVER_MemoryType* memory_type,
-    int64_t* memory_type_id);
-
-/// Get a buffer holding (part of) the tensor data for an input for a specific
-/// host policy. If there are no input buffers specified for this host policy,
-/// the fallback input buffer is returned.
-/// For a given input the number of buffers composing the input are found
-/// from 'buffer_count' returned by TRITONBACKEND_InputPropertiesForHostPolicy.
-/// The returned buffer is owned by the input and so should not be modified or
-/// freed by the caller. The lifetime of the buffer matches that of the input
-/// and so the buffer should not be accessed after the input tensor object is
-/// released.
-///
-/// \param input The input tensor.
-/// \param host_policy_name The host policy name. Fallback input buffer
-/// will be return if nullptr is provided.
-/// \param index The index of the buffer. Must be 0 <= index <
-/// buffer_count, where buffer_count is the value returned by
-/// TRITONBACKEND_InputPropertiesForHostPolicy.
-/// \param buffer Returns a pointer to a contiguous block of data for
-/// the named input.
-/// \param buffer_byte_size Returns the size, in bytes, of 'buffer'.
-/// \param memory_type Acts as both input and output. On input gives
-/// the buffer memory type preferred by the function caller.  Returns
-/// the actual memory type of 'buffer'.
-/// \param memory_type_id Acts as both input and output. On input
-/// gives the buffer memory type id preferred by the function caller.
-/// Returns the actual memory type id of 'buffer'.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_InputBufferForHostPolicy(
-    TRITONBACKEND_Input* input, const char* host_policy_name,
-    const uint32_t index, const void** buffer, uint64_t* buffer_byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
-
-/// Get the buffer attributes associated with the given input buffer. For a
-/// given input the number of buffers composing the input are found from
-/// 'buffer_count' returned by TRITONBACKEND_InputProperties. The returned
-/// 'buffer_attributes' is owned by the input and so should not be modified or
-/// freed by the caller. The lifetime of the 'buffer_attributes' matches that of
-/// the input and so the 'buffer_attributes' should not be accessed after the
-/// input tensor object is released.
-///
-/// \param input The input tensor.
-/// \param index The index of the buffer. Must be 0 <= index < buffer_count,
-/// where buffer_count is the value returned by TRITONBACKEND_InputProperties.
-/// \param buffer Returns a pointer to a contiguous block of data for
-/// the named input.
-/// \param buffer_attributes Returns the attributes for the given buffer.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_InputBufferAttributes(
-    TRITONBACKEND_Input* input, const uint32_t index, const void** buffer,
-    TRITONSERVER_BufferAttributes** buffer_attributes);
-
-///
-/// TRITONBACKEND_Output
-///
-/// Object representing a response output tensor.
-///
-
-/// Get a buffer to use to hold the tensor data for the output. The
-/// returned buffer is owned by the output and so should not be freed
-/// by the caller. The caller can and should fill the buffer with the
-/// output data for the tensor. The lifetime of the buffer matches
-/// that of the output and so the buffer should not be accessed after
-/// the output tensor object is released.
-///
-/// \param buffer Returns a pointer to a buffer where the contents of
-/// the output tensor should be placed.
-/// \param buffer_byte_size The size, in bytes, of the buffer required
-/// by the caller.
-/// \param memory_type Acts as both input and output. On input gives
-/// the buffer memory type preferred by the caller.  Returns the
-/// actual memory type of 'buffer'.
-/// \param memory_type_id Acts as both input and output. On input
-/// gives the buffer memory type id preferred by the caller. Returns
-/// the actual memory type id of 'buffer'.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_OutputBuffer(
-    TRITONBACKEND_Output* output, void** buffer,
-    const uint64_t buffer_byte_size, TRITONSERVER_MemoryType* memory_type,
-    int64_t* memory_type_id);
-
-/// Get the buffer attributes associated with the given output buffer. The
-/// returned 'buffer_attributes' is owned by the output and so should not be
-/// modified or freed by the caller. The lifetime of the 'buffer_attributes'
-/// matches that of the output and so the 'buffer_attributes' should not be
-/// accessed after the output tensor object is released. This function must be
-/// called after the TRITONBACKEND_OutputBuffer otherwise it might contain
-/// incorrect data.
-///
-/// \param output The output tensor.
-/// \param buffer_attributes Returns the attributes for the output buffer.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_OutputBufferAttributes(
-    TRITONBACKEND_Output* output,
-    TRITONSERVER_BufferAttributes** buffer_attributes);
-
-///
-/// TRITONBACKEND_Request
-///
-/// Object representing an inference request.
-///
-
-/// Get the ID of the request. Can be nullptr if request doesn't have
-/// an ID. The returned string is owned by the request, not the
-/// caller, and so should not be modified or freed.
-///
-/// \param request The inference request.
-/// \param id Returns the ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestId(
-    TRITONBACKEND_Request* request, const char** id);
-
-/// Get the correlation ID of the request if it is an unsigned integer.
-/// Zero indicates that the request does not have a correlation ID.
-/// Returns failure if correlation ID for given request is not an unsigned
-/// integer.
-///
-/// \param request The inference request.
-/// \param id Returns the correlation ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestCorrelationId(
-    TRITONBACKEND_Request* request, uint64_t* id);
-
-/// Get the correlation ID of the request if it is a string.
-/// Empty string indicates that the request does not have a correlation ID.
-/// Returns error if correlation ID for given request is not a string.
-///
-/// \param request The inference request.
-/// \param id Returns the correlation ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestCorrelationIdString(
-    TRITONBACKEND_Request* request, const char** id);
-
-/// Get the flag(s) associated with a request. On return 'flags' holds
-/// a bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
-/// available flags.
-///
-/// \param request The inference request.
-/// \param flags Returns the flags.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestFlags(
-    TRITONBACKEND_Request* request, uint32_t* flags);
-
-/// Get the number of input tensors specified in the request.
-///
-/// \param request The inference request.
-/// \param count Returns the number of input tensors.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestInputCount(
-    TRITONBACKEND_Request* request, uint32_t* count);
-
-/// Get the name of an input tensor. The caller does not own
-/// the returned string and must not modify or delete it. The lifetime
-/// of the returned string extends only as long as 'request'.
-///
-/// \param request The inference request.
-/// \param index The index of the input tensor. Must be 0 <= index <
-/// count, where count is the value returned by
-/// TRITONBACKEND_RequestInputCount.
-/// \param input_name Returns the name of the input tensor
-/// corresponding to the index.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestInputName(
-    TRITONBACKEND_Request* request, const uint32_t index,
-    const char** input_name);
-
-/// Get a named request input. The lifetime of the returned input
-/// object matches that of the request and so the input object should
-/// not be accessed after the request object is released.
-///
-/// \param request The inference request.
-/// \param name The name of the input.
-/// \param input Returns the input corresponding to the name.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestInput(
-    TRITONBACKEND_Request* request, const char* name,
-    TRITONBACKEND_Input** input);
-
-/// Get a request input by index. The order of inputs in a given
-/// request is not necessarily consistent with other requests, even if
-/// the requests are in the same batch. As a result, you can not
-/// assume that an index obtained from one request will point to the
-/// same input in a different request.
-///
-/// The lifetime of the returned input object matches that of the
-/// request and so the input object should not be accessed after the
-/// request object is released.
-///
-/// \param request The inference request.
-/// \param index The index of the input tensor. Must be 0 <= index <
-/// count, where count is the value returned by
-/// TRITONBACKEND_RequestInputCount.
-/// \param input Returns the input corresponding to the index.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestInputByIndex(
-    TRITONBACKEND_Request* request, const uint32_t index,
-    TRITONBACKEND_Input** input);
-
-/// Get the number of output tensors requested to be returned in the
-/// request.
-///
-/// \param request The inference request.
-/// \param count Returns the number of output tensors.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestOutputCount(
-    TRITONBACKEND_Request* request, uint32_t* count);
-
-/// Get the name of a requested output tensor. The caller does not own
-/// the returned string and must not modify or delete it. The lifetime
-/// of the returned string extends only as long as 'request'.
-///
-/// \param request The inference request.
-/// \param index The index of the requested output tensor. Must be 0
-/// <= index < count, where count is the value returned by
-/// TRITONBACKEND_RequestOutputCount.
-/// \param output_name Returns the name of the requested output tensor
-/// corresponding to the index.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestOutputName(
-    TRITONBACKEND_Request* request, const uint32_t index,
-    const char** output_name);
-
-/// Returns the preferred memory type and memory type ID of the output buffer
-/// for the request. As much as possible, Triton will attempt to return
-/// the same memory_type and memory_type_id values that will be returned by
-/// the subsequent call to TRITONBACKEND_OutputBuffer, however, the backend must
-/// be capable of handling cases where the values differ.
-///
-/// \param request The request.
-/// \param name The name of the output tensor. This is optional
-/// and it should be set to nullptr to indicate that the tensor name has
-/// not determined.
-/// \param byte_size The expected size of the buffer. This is optional
-/// and it should be set to nullptr to indicate that the byte size has
-/// not determined.
-/// \param memory_type Acts as both input and output. On input gives
-/// the memory type preferred by the caller. Returns memory type preferred
-/// by Triton, taken account of the caller preferred type.
-/// \param memory_type_id Acts as both input and output. On input gives
-/// the memory type ID preferred by the caller. Returns memory type ID preferred
-/// by Triton, taken account of the caller preferred type ID.
-/// \return a TRITONSERVER_Error object if a failure occurs.
-/// A TRITONSERVER_ERROR_UNAVAILABLE error indicates that the properties are not
-/// available, other error codes indicate an error.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestOutputBufferProperties(
-    TRITONBACKEND_Request* request, const char* name, size_t* byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
-
-/// Release the request. The request should be released when it is no
-/// longer needed by the backend. If this call returns with an error
-/// (i.e. non-nullptr) then the request was not released and ownership
-/// remains with the backend. If this call returns with success, the
-/// 'request' object is no longer owned by the backend and must not be
-/// used. Any tensor names, data types, shapes, input tensors,
-/// etc. returned by TRITONBACKEND_Request* functions for this request
-/// are no longer valid. If a persistent copy of that data is required
-/// it must be created before calling this function.
-///
-/// \param request The inference request.
-/// \param release_flags Flags indicating what type of request release
-/// should be performed. \see TRITONSERVER_RequestReleaseFlag. \see
-/// TRITONSERVER_InferenceRequestReleaseFn_t.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestRelease(
-    TRITONBACKEND_Request* request, uint32_t release_flags);
-
-///
-/// TRITONBACKEND_ResponseFactory
-///
-/// Object representing an inference response factory. Using a
-/// response factory is not required; instead a response can be
-/// generated directly from a TRITONBACKEND_Request object using
-/// TRITONBACKEND_ResponseNew(). A response factory allows a request
-/// to be released before all responses have been sent. Releasing a
-/// request as early as possible releases all input tensor data and
-/// therefore may be desirable in some cases.
-
-/// Create the response factory associated with a request.
-///
-/// \param factory Returns the new response factory.
-/// \param request The inference request.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseFactoryNew(
-    TRITONBACKEND_ResponseFactory** factory, TRITONBACKEND_Request* request);
-
-/// Destroy a response factory.
-///
-/// \param factory The response factory.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseFactoryDelete(
-    TRITONBACKEND_ResponseFactory* factory);
-
-/// Send response flags without a corresponding response.
-///
-/// \param factory The response factory.
-/// \param send_flags Flags to send. \see
-/// TRITONSERVER_ResponseCompleteFlag. \see
-/// TRITONSERVER_InferenceResponseCompleteFn_t.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseFactorySendFlags(
-    TRITONBACKEND_ResponseFactory* factory, const uint32_t send_flags);
-
-///
-/// TRITONBACKEND_Response
-///
-/// Object representing an inference response. For a given request,
-/// the backend must carefully manage the lifecycle of responses
-/// generated for that request to ensure that the output tensor
-/// buffers are allocated correctly. When a response is created with
-/// TRITONBACKEND_ResponseNew or TRITONBACKEND_ResponseNewFromFactory,
-/// all the outputs and corresponding buffers must be created for that
-/// response using TRITONBACKEND_ResponseOutput and
-/// TRITONBACKEND_OutputBuffer *before* another response is created
-/// for the request. For a given response, outputs can be created in
-/// any order but they must be created sequentially/sychronously (for
-/// example, the backend cannot use multiple threads to simultaneously
-/// add multiple outputs to a response).
-///
-/// The above requirement applies only to responses being generated
-/// for a given request. The backend may generate responses in
-/// parallel on multiple threads as long as those responses are for
-/// different requests.
-///
-/// This order of response creation must be strictly followed. But,
-/// once response(s) are created they do not need to be sent
-/// immediately, nor do they need to be sent in the order they were
-/// created. The backend may even delete a created response instead of
-/// sending it by using TRITONBACKEND_ResponseDelete.
-
-/// Create a response for a request.
-///
-/// \param response Returns the new response.
-/// \param request The request.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseNew(
-    TRITONBACKEND_Response** response, TRITONBACKEND_Request* request);
-
-/// Create a response using a factory.
-///
-/// \param response Returns the new response.
-/// \param factory The response factory.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseNewFromFactory(
-    TRITONBACKEND_Response** response, TRITONBACKEND_ResponseFactory* factory);
-
-/// Destroy a response. It is not necessary to delete a response if
-/// TRITONBACKEND_ResponseSend is called as that function transfers
-/// ownership of the response object to Triton.
-///
-/// \param response The response.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseDelete(
-    TRITONBACKEND_Response* response);
-
-/// Set a string parameter in the response.
-///
-/// \param response The response.
-/// \param name The name of the parameter.
-/// \param value The value of the parameter.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseSetStringParameter(
-    TRITONBACKEND_Response* response, const char* name, const char* value);
-
-/// Set an integer parameter in the response.
-///
-/// \param response The response.
-/// \param name The name of the parameter.
-/// \param value The value of the parameter.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseSetIntParameter(
-    TRITONBACKEND_Response* response, const char* name, const int64_t value);
-
-/// Set an boolean parameter in the response.
-///
-/// \param response The response.
-/// \param name The name of the parameter.
-/// \param value The value of the parameter.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseSetBoolParameter(
-    TRITONBACKEND_Response* response, const char* name, const bool value);
-
-/// Create an output tensor in the response. The lifetime of the
-/// returned output tensor object matches that of the response and so
-/// the output tensor object should not be accessed after the response
-/// object is deleted.
-///
-/// \param response The response.
-/// \param output Returns the new response output.
-/// \param name The name of the output tensor.
-/// \param datatype The datatype of the output tensor.
-/// \param shape The shape of the output tensor.
-/// \param dims_count The number of dimensions in the output tensor
-/// shape.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseOutput(
-    TRITONBACKEND_Response* response, TRITONBACKEND_Output** output,
-    const char* name, const TRITONSERVER_DataType datatype,
-    const int64_t* shape, const uint32_t dims_count);
-
-/// Send a response. Calling this function transfers ownership of the
-/// response object to Triton. The caller must not access or delete
-/// the response object after calling this function.
-///
-/// \param response The response.
-/// \param send_flags Flags associated with the response. \see
-/// TRITONSERVER_ResponseCompleteFlag. \see
-/// TRITONSERVER_InferenceResponseCompleteFn_t.
-/// \param error The TRITONSERVER_Error to send if the response is an
-/// error, or nullptr if the response is successful.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseSend(
-    TRITONBACKEND_Response* response, const uint32_t send_flags,
-    TRITONSERVER_Error* error);
-
-///
-/// TRITONBACKEND_State
-///
-/// Object representing a state.
-///
-
-/// Create a state in the request. The returned state object is only valid
-/// before the TRITONBACKEND_StateUpdate is called. The state should not be
-/// freed by the caller. If TRITONBACKEND_StateUpdate is not called, the
-/// lifetime of the state matches the lifetime of the request. If the state name
-/// does not exist in the "state" section of the model configuration, the state
-/// will not be created and an error will be returned. If this function is
-/// called when sequence batching is not enabled or there is no 'states' section
-/// in the sequence batching section of the model configuration, this call will
-/// return an error.
-///
-/// \param state Returns the new state.
-/// \param request The request.
-/// \param name The name of the state.
-/// \param datatype The datatype of the state.
-/// \param shape The shape of the state.
-/// \param dims_count The number of dimensions in the state shape.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_StateNew(
-    TRITONBACKEND_State** state, TRITONBACKEND_Request* request,
-    const char* name, const TRITONSERVER_DataType datatype,
-    const int64_t* shape, const uint32_t dims_count);
-
-/// Update the state for the sequence. Calling this function will replace the
-/// state stored for this seqeunce in Triton with 'state' provided in the
-/// function argument. If this function is called when sequence batching is not
-/// enabled or there is no 'states' section in the sequence batching section of
-/// the model configuration, this call will return an error. The backend is not
-/// required to call this function. If the backend doesn't call
-/// TRITONBACKEND_StateUpdate function, this particular state for the sequence
-/// will not be updated and the next inference request in the sequence will use
-/// the same state as the current inference request.
-///
-/// \param state The state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_StateUpdate(
-    TRITONBACKEND_State* state);
-
-/// Get a buffer to use to hold the tensor data for the state. The returned
-/// buffer is owned by the state and so should not be freed by the caller. The
-/// caller can and should fill the buffer with the state data. The buffer must
-/// not be accessed by the backend after TRITONBACKEND_StateUpdate is called.
-/// The caller should fill the buffer before calling TRITONBACKEND_StateUpdate.
-///
-/// \param state The state.
-/// \param buffer Returns a pointer to a buffer where the contents of the state
-/// should be placed.
-/// \param buffer_byte_size The size, in bytes, of the buffer required
-/// by the caller.
-/// \param memory_type Acts as both input and output. On input gives
-/// the buffer memory type preferred by the caller.  Returns the
-/// actual memory type of 'buffer'.
-/// \param memory_type_id Acts as both input and output. On input
-/// gives the buffer memory type id preferred by the caller. Returns
-/// the actual memory type id of 'buffer'.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_StateBuffer(
-    TRITONBACKEND_State* state, void** buffer, const uint64_t buffer_byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
-
-/// Get the buffer attributes associated with the given state buffer.
-/// The returned 'buffer_attributes' is owned by the state and so should not be
-/// modified or freed by the caller. The lifetime of the 'buffer_attributes'
-/// matches that of the state.
-///
-/// \param state The state.
-/// \param buffer_attributes Returns the buffer attributes for the given state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_StateBufferAttributes(
-    TRITONBACKEND_State* state,
-    TRITONSERVER_BufferAttributes** buffer_attributes);
-
-///
-/// TRITONBACKEND_Backend
-///
-/// Object representing a backend.
-///
-
-/// TRITONBACKEND_ExecutionPolicy
-///
-/// Types of execution policy that can be implemented by a backend.
-///
-///   TRITONBACKEND_EXECUTION_BLOCKING: An instance of the model
-///     blocks in TRITONBACKEND_ModelInstanceExecute until it is ready
-///     to handle another inference. Upon returning from
-///     TRITONBACKEND_ModelInstanceExecute, Triton may immediately
-///     call TRITONBACKEND_ModelInstanceExecute for the same instance
-///     to execute a new batch of requests. Thus, most backends using
-///     this policy will not return from
-///     TRITONBACKEND_ModelInstanceExecute until all responses have
-///     been sent and all requests have been released. This is the
-///     default execution policy.
-///
-///   TRITONBACKEND_EXECUTION_DEVICE_BLOCKING: An instance, A, of the
-///     model blocks in TRITONBACKEND_ModelInstanceExecute if the
-///     device associated with the instance is unable to handle
-///     another inference. Even if another instance, B, associated
-///     with the device, is available and ready to perform an
-///     inference, Triton will not invoke
-///     TRITONBACKEND_ModeInstanceExecute for B until A returns from
-///     TRITONBACKEND_ModelInstanceExecute. Triton will not be blocked
-///     from calling TRITONBACKEND_ModelInstanceExecute for instance
-///     C, which is associated with a different device than A and B,
-///     even if A or B has not returned from
-///     TRITONBACKEND_ModelInstanceExecute. This execution policy is
-///     typically used by a backend that can cooperatively execute
-///     multiple model instances on the same device.
-///
-typedef enum TRITONBACKEND_execpolicy_enum {
-  TRITONBACKEND_EXECUTION_BLOCKING,
-  TRITONBACKEND_EXECUTION_DEVICE_BLOCKING
-} TRITONBACKEND_ExecutionPolicy;
-
-/// Get the name of the backend. The caller does not own the returned
-/// string and must not modify or delete it. The lifetime of the
-/// returned string extends only as long as 'backend'.
-///
-/// \param backend The backend.
-/// \param name Returns the name of the backend.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_BackendName(
-    TRITONBACKEND_Backend* backend, const char** name);
-
-/// Get the backend configuration.  The 'backend_config' message is
-/// owned by Triton and should not be modified or freed by the caller.
-///
-/// The backend configuration, as JSON, is:
-///
-///   {
-///     "cmdline" : {
-///       "<setting>" : "<value>",
-///       ...
-///     }
-///   }
-///
-/// \param backend The backend.
-/// \param backend_config Returns the backend configuration as a message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_BackendConfig(
-    TRITONBACKEND_Backend* backend, TRITONSERVER_Message** backend_config);
-
-/// Get the execution policy for this backend. By default the
-/// execution policy is TRITONBACKEND_EXECUTION_BLOCKING.
-///
-/// \param backend The backend.
-/// \param policy Returns the execution policy.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_BackendExecutionPolicy(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_ExecutionPolicy* policy);
-
-/// Set the execution policy for this backend. By default the
-/// execution policy is TRITONBACKEND_EXECUTION_BLOCKING. Triton reads
-/// the backend's execution policy after calling
-/// TRITONBACKEND_Initialize, so to be recognized changes to the
-/// execution policy must be made in TRITONBACKEND_Initialize.
-/// Also, note that if using sequence batcher for the model, Triton will
-/// use TRITONBACKEND_EXECUTION_BLOCKING policy irrespective of the
-/// policy specified by this setter function.
-///
-/// \param backend The backend.
-/// \param policy The execution policy.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendSetExecutionPolicy(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_ExecutionPolicy policy);
-
-/// Get the location of the files that make up the backend
-/// implementation. This location contains the backend shared library
-/// and any other files located with the shared library. The
-/// 'location' communicated depends on how the backend is being
-/// communicated to Triton as indicated by 'artifact_type'.
-///
-///   TRITONBACKEND_ARTIFACT_FILESYSTEM: The backend artifacts are
-///     made available to Triton via the local filesytem. 'location'
-///     returns the full path to the directory containing this
-///     backend's artifacts. The returned string is owned by Triton,
-///     not the caller, and so should not be modified or freed.
-///
-/// \param backend The backend.
-/// \param artifact_type Returns the artifact type for the backend.
-/// \param path Returns the location.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_BackendArtifacts(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_ArtifactType* artifact_type,
-    const char** location);
-
-/// Get the memory manager associated with a backend.
-///
-/// \param backend The backend.
-/// \param manager Returns the memory manager.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_BackendMemoryManager(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_MemoryManager** manager);
-
-/// Get the user-specified state associated with the backend. The
-/// state is completely owned and managed by the backend.
-///
-/// \param backend The backend.
-/// \param state Returns the user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_BackendState(
-    TRITONBACKEND_Backend* backend, void** state);
-
-/// Set the user-specified state associated with the backend. The
-/// state is completely owned and managed by the backend.
-///
-/// \param backend The backend.
-/// \param state The user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_BackendSetState(
-    TRITONBACKEND_Backend* backend, void* state);
-
-///
-/// TRITONBACKEND_Model
-///
-/// Object representing a model implemented using the backend.
-///
-
-/// Get the name of the model. The returned string is owned by the
-/// model object, not the caller, and so should not be modified or
-/// freed.
-///
-/// \param model The model.
-/// \param name Returns the model name.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelName(
-    TRITONBACKEND_Model* model, const char** name);
-
-/// Get the version of the model.
-///
-/// \param model The model.
-/// \param version Returns the model version.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelVersion(
-    TRITONBACKEND_Model* model, uint64_t* version);
-
-/// Get the location of the files that make up the model. The
-/// 'location' communicated depends on how the model is being
-/// communicated to Triton as indicated by 'artifact_type'.
-///
-///   TRITONBACKEND_ARTIFACT_FILESYSTEM: The model artifacts are made
-///     available to Triton via the local filesytem. 'location'
-///     returns the full path to the directory in the model repository
-///     that contains this model's artifacts. The returned string is
-///     owned by Triton, not the caller, and so should not be modified
-///     or freed.
-///
-/// \param model The model.
-/// \param artifact_type Returns the artifact type for the model.
-/// \param path Returns the location.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelRepository(
-    TRITONBACKEND_Model* model, TRITONBACKEND_ArtifactType* artifact_type,
-    const char** location);
-
-/// Get the model configuration. The caller takes ownership of the
-/// message object and must call TRITONSERVER_MessageDelete to release
-/// the object. The configuration is available via this call even
-/// before the model is loaded and so can be used in
-/// TRITONBACKEND_ModelInitialize. TRITONSERVER_ServerModelConfig
-/// returns equivalent information but is not useable until after the
-/// model loads.
-///
-/// \param model The model.
-/// \param config_version The model configuration will be returned in
-/// a format matching this version. If the configuration cannot be
-/// represented in the requested version's format then an error will
-/// be returned. Currently only version 1 is supported.
-/// \param model_config Returns the model configuration as a message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelConfig(
-    TRITONBACKEND_Model* model, const uint32_t config_version,
-    TRITONSERVER_Message** model_config);
-
-/// Whether the backend should attempt to auto-complete the model configuration.
-/// If true, the model should fill the inputs, outputs, and max batch size in
-/// the model configuration if incomplete. If the model configuration is
-/// changed,  the new configuration must be reported to Triton using
-/// TRITONBACKEND_ModelSetConfig.
-///
-/// \param model The model.
-/// \param auto_complete_config Returns whether the backend should auto-complete
-/// the model configuration.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelAutoCompleteConfig(
-    TRITONBACKEND_Model* model, bool* auto_complete_config);
-
-/// Set the model configuration in Triton server. This API should only be called
-/// when the backend implements the auto-completion of model configuration
-/// and TRITONBACKEND_ModelAutoCompleteConfig returns true in
-/// auto_complete_config. Only the inputs, outputs, max batch size, and
-/// scheduling choice can be changed. A caveat being scheduling choice can only
-/// be changed if none is previously set. Any other changes to the model
-/// configuration will be ignored by Triton. This function can only be called
-/// from TRITONBACKEND_ModelInitialize, calling in any other context will result
-/// in an error being returned. Additionally, Triton server can add some of the
-/// missing fields in the provided config with this call. The backend must get
-/// the complete configuration again by using TRITONBACKEND_ModelConfig.
-/// TRITONBACKEND_ModelSetConfig does not take ownership of the message object
-/// and so the caller should call TRITONSERVER_MessageDelete to release the
-/// object once the function returns.
-///
-/// \param model The model.
-/// \param config_version The format version of the model configuration.
-/// If the configuration is not represented in the version's format
-/// then an error will be returned. Currently only version 1 is supported.
-/// \param model_config The updated model configuration as a message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelSetConfig(
-    TRITONBACKEND_Model* model, const uint32_t config_version,
-    TRITONSERVER_Message* model_config);
-
-/// Get the TRITONSERVER_Server object that this model is being served
-/// by.
-///
-/// \param model The model.
-/// \param server Returns the server.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelServer(
-    TRITONBACKEND_Model* model, TRITONSERVER_Server** server);
-
-/// Get the backend used by the model.
-///
-/// \param model The model.
-/// \param model Returns the backend object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelBackend(
-    TRITONBACKEND_Model* model, TRITONBACKEND_Backend** backend);
-
-/// Get the user-specified state associated with the model. The
-/// state is completely owned and managed by the backend.
-///
-/// \param model The model.
-/// \param state Returns the user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelState(
-    TRITONBACKEND_Model* model, void** state);
-
-/// Set the user-specified state associated with the model. The
-/// state is completely owned and managed by the backend.
-///
-/// \param model The model.
-/// \param state The user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelSetState(
-    TRITONBACKEND_Model* model, void* state);
-
-///
-/// TRITONBACKEND_ModelInstance
-///
-/// Object representing a model instance implemented using the
-/// backend.
-///
-
-/// Get the name of the model instance. The returned string is owned by the
-/// model object, not the caller, and so should not be modified or
-/// freed.
-///
-/// \param instance The model instance.
-/// \param name Returns the instance name.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceName(
-    TRITONBACKEND_ModelInstance* instance, const char** name);
-
-/// Get the kind of the model instance.
-///
-/// \param instance The model instance.
-/// \param kind Returns the instance kind.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceKind(
-    TRITONBACKEND_ModelInstance* instance,
-    TRITONSERVER_InstanceGroupKind* kind);
-
-/// Get the device ID of the model instance.
-///
-/// \param instance The model instance.
-/// \param device_id Returns the instance device ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceDeviceId(
-    TRITONBACKEND_ModelInstance* instance, int32_t* device_id);
-
-/// Get the host policy setting.  The 'host_policy' message is
-/// owned by Triton and should not be modified or freed by the caller.
-///
-/// The host policy setting, as JSON, is:
-///
-///   {
-///     "<host_policy>" : {
-///       "<setting>" : "<value>",
-///       ...
-///     }
-///   }
-///
-/// \param instance The model instance.
-/// \param host_policy Returns the host policy setting as a message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceHostPolicy(
-    TRITONBACKEND_ModelInstance* instance, TRITONSERVER_Message** host_policy);
-
-/// Whether the model instance is passive.
-///
-/// \param instance The model instance.
-/// \param is_passive Returns true if the instance is passive, false otherwise
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceIsPassive(
-    TRITONBACKEND_ModelInstance* instance, bool* is_passive);
-
-/// Get the number of optimization profiles to be loaded for the instance.
-///
-/// \param instance The model instance.
-/// \param count Returns the number of optimization profiles.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceProfileCount(
-    TRITONBACKEND_ModelInstance* instance, uint32_t* count);
-
-/// Get the name of optimization profile. The caller does not own
-/// the returned string and must not modify or delete it. The lifetime
-/// of the returned string extends only as long as 'instance'.
-///
-/// \param instance The model instance.
-/// \param index The index of the optimization profile. Must be 0
-/// <= index < count, where count is the value returned by
-/// TRITONBACKEND_ModelInstanceProfileCount.
-/// \param profile_name Returns the name of the optimization profile
-/// corresponding to the index.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceProfileName(
-    TRITONBACKEND_ModelInstance* instance, const uint32_t index,
-    const char** profile_name);
-
-/// Get the number of secondary devices configured for the instance.
-///
-/// \param instance The model instance.
-/// \param count Returns the number of secondary devices.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceSecondaryDeviceCount(
-    TRITONBACKEND_ModelInstance* instance, uint32_t* count);
-
-/// Get the properties of indexed secondary device. The returned
-/// strings and other properties are owned by the instance, not the
-/// caller, and so should not be modified or freed.
-///
-/// \param instance The model instance.
-/// \param index The index of the secondary device. Must be 0
-/// <= index < count, where count is the value returned by
-/// TRITONBACKEND_ModelInstanceSecondaryDeviceCount.
-/// \param kind Returns the kind of secondary device corresponding
-/// to the index.
-/// \param id Returns the id of secondary device corresponding to the index.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceSecondaryDeviceProperties(
-    TRITONBACKEND_ModelInstance* instance, uint32_t index, const char** kind,
-    int64_t* id);
-
-/// Get the model associated with a model instance.
-///
-/// \param instance The model instance.
-/// \param backend Returns the model object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceModel(
-    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Model** model);
-
-/// Get the user-specified state associated with the model
-/// instance. The state is completely owned and managed by the
-/// backend.
-///
-/// \param instance The model instance.
-/// \param state Returns the user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceState(
-    TRITONBACKEND_ModelInstance* instance, void** state);
-
-/// Set the user-specified state associated with the model
-/// instance. The state is completely owned and managed by the
-/// backend.
-///
-/// \param instance The model instance.
-/// \param state The user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceSetState(
-    TRITONBACKEND_ModelInstance* instance, void* state);
-
-/// Record statistics for an inference request.
-///
-/// Set 'success' true to indicate that the inference request
-/// completed successfully. In this case all timestamps should be
-/// non-zero values reported in nanoseconds and should be collected
-/// using std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
-/// Set 'success' to false to indicate that the inference request failed
-/// to complete successfully. In this case all timestamps values are
-/// ignored.
-///
-/// For consistency of measurement across different backends, the
-/// timestamps should be collected at the following points during
-/// TRITONBACKEND_ModelInstanceExecute.
-///
-///   TRITONBACKEND_ModelInstanceExecute()
-///     CAPTURE TIMESPACE (exec_start_ns)
-///     < process input tensors to prepare them for inference
-///       execution, including copying the tensors to/from GPU if
-///       necessary>
-///     CAPTURE TIMESPACE (compute_start_ns)
-///     < perform inference computations to produce outputs >
-///     CAPTURE TIMESPACE (compute_end_ns)
-///     < allocate output buffers and extract output tensors, including
-///       copying the tensors to/from GPU if necessary>
-///     CAPTURE TIMESPACE (exec_end_ns)
-///     return
-///
-/// Note that these statistics are associated with a valid
-/// TRITONBACKEND_Request object and so must be reported before the
-/// request is released. For backends that release the request before
-/// all response(s) are sent, these statistics cannot capture
-/// information about the time required to produce the response.
-///
-/// \param instance The model instance.
-/// \param request The inference request that statistics are being
-/// reported for.
-/// \param success True if the inference request completed
-/// successfully, false if it failed to complete.
-/// \param exec_start_ns Timestamp for the start of execution.
-/// \param compute_start_ns Timestamp for the start of execution
-/// computations.
-/// \param compute_end_ns Timestamp for the end of execution
-/// computations.
-/// \param exec_end_ns Timestamp for the end of execution.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceReportStatistics(
-    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request* request,
-    const bool success, const uint64_t exec_start_ns,
-    const uint64_t compute_start_ns, const uint64_t compute_end_ns,
-    const uint64_t exec_end_ns);
-
-/// Record statistics for the execution of an entire batch of
-/// inference requests.
-///
-/// All timestamps should be non-zero values reported in nanoseconds
-/// and should be collected using
-/// std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
-/// See TRITONBACKEND_ModelInstanceReportStatistics for more information about
-/// the timestamps.
-///
-/// 'batch_size' is the sum of the batch sizes for the individual
-/// requests that were delivered together in the call to
-/// TRITONBACKEND_ModelInstanceExecute. For example, if three requests
-/// are passed to TRITONBACKEND_ModelInstanceExecute and those
-/// requests have batch size 1, 2, and 3; then 'batch_size' should be
-/// set to 6.
-///
-/// \param instance The model instance.
-/// \param batch_size Combined batch size of all the individual
-/// requests executed in the batch.
-/// \param exec_start_ns Timestamp for the start of execution.
-/// \param compute_start_ns Timestamp for the start of execution
-/// computations.
-/// \param compute_end_ns Timestamp for the end of execution
-/// computations.
-/// \param exec_end_ns Timestamp for the end of execution.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceReportBatchStatistics(
-    TRITONBACKEND_ModelInstance* instance, const uint64_t batch_size,
-    const uint64_t exec_start_ns, const uint64_t compute_start_ns,
-    const uint64_t compute_end_ns, const uint64_t exec_end_ns);
-
-///
-/// The following functions can be implemented by a backend. Functions
-/// indicated as required must be implemented or the backend will fail
-/// to load.
-///
-
-/// Initialize a backend. This function is optional, a backend is not
-/// required to implement it. This function is called once when a
-/// backend is loaded to allow the backend to initialize any state
-/// associated with the backend. A backend has a single state that is
-/// shared across all models that use the backend.
-///
-/// \param backend The backend.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_Initialize(
-    TRITONBACKEND_Backend* backend);
-
-/// Finalize for a backend. This function is optional, a backend is
-/// not required to implement it. This function is called once, just
-/// before the backend is unloaded. All state associated with the
-/// backend should be freed and any threads created for the backend
-/// should be exited/joined before returning from this function.
-///
-/// \param backend The backend.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_Finalize(
-    TRITONBACKEND_Backend* backend);
-
-/// Initialize for a model. This function is optional, a backend is
-/// not required to implement it. This function is called once when a
-/// model that uses the backend is loaded to allow the backend to
-/// initialize any state associated with the model. The backend should
-/// also examine the model configuration to determine if the
-/// configuration is suitable for the backend. Any errors reported by
-/// this function will prevent the model from loading.
-///
-/// \param model The model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInitialize(
-    TRITONBACKEND_Model* model);
-
-/// Finalize for a model. This function is optional, a backend is not
-/// required to implement it. This function is called once for a
-/// model, just before the model is unloaded from Triton. All state
-/// associated with the model should be freed and any threads created
-/// for the model should be exited/joined before returning from this
-/// function.
-///
-/// \param model The model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelFinalize(
-    TRITONBACKEND_Model* model);
-
-/// Initialize for a model instance. This function is optional, a
-/// backend is not required to implement it. This function is called
-/// once when a model instance is created to allow the backend to
-/// initialize any state associated with the instance.
-///
-/// \param instance The model instance.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceInitialize(
-    TRITONBACKEND_ModelInstance* instance);
-
-/// Finalize for a model instance. This function is optional, a
-/// backend is not required to implement it. This function is called
-/// once for an instance, just before the corresponding model is
-/// unloaded from Triton. All state associated with the instance
-/// should be freed and any threads created for the instance should be
-/// exited/joined before returning from this function.
-///
-/// \param instance The model instance.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceFinalize(
-    TRITONBACKEND_ModelInstance* instance);
-
-/// Execute a batch of one or more requests on a model instance. This
-/// function is required. Triton will not perform multiple
-/// simultaneous calls to this function for a given model 'instance';
-/// however, there may be simultaneous calls for different model
-/// instances (for the same or different models).
-///
-/// If an error is returned the ownership of the request objects
-/// remains with Triton and the backend must not retain references to
-/// the request objects or access them in any way.
-///
-/// If success is returned, ownership of the request objects is
-/// transferred to the backend and it is then responsible for creating
-/// responses and releasing the request objects. Note that even though
-/// ownership of the request objects is transferred to the backend, the
-/// ownership of the buffer holding request pointers is returned back
-/// to Triton upon return from TRITONBACKEND_ModelInstanceExecute. If
-/// any request objects need to be maintained beyond
-/// TRITONBACKEND_ModelInstanceExecute, then the pointers must be copied
-/// out of the array within TRITONBACKEND_ModelInstanceExecute.
-///
-/// \param instance The model instance.
-/// \param requests The requests.
-/// \param request_count The number of requests in the batch.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceExecute(
-    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
-    const uint32_t request_count);
-
-/// Query the backend for different model attributes. This function is optional,
-/// a backend is not required to implement it. The backend is also not required
-/// to set all backend attribute listed. This function is called when
-/// Triton requires further backend / model information to perform operations.
-/// This function may be called multiple times within the lifetime of the
-/// backend (between TRITONBACKEND_Initialize and TRITONBACKEND_Finalize).
-/// The backend may return error to indicate failure to set the backend
-/// attributes, and the attributes specified in the same function call will be
-/// ignored. Triton will update the specified attributes if 'nullptr' is
-/// returned.
-///
-/// \param backend The backend.
-/// \param backend_attributes Return the backend attribute.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_GetBackendAttribute(
-    TRITONBACKEND_Backend* backend,
-    TRITONBACKEND_BackendAttribute* backend_attributes);
-
-/// TRITONBACKEND_BackendAttribute
-///
-/// API to modify attributes associated with a backend.
-///
-
-/// Add the preferred instance group of the backend. This function
-/// can be called multiple times to cover different instance group kinds that
-/// the backend supports, given the priority order that the first call describes
-/// the most preferred group. In the case where instance group are not
-/// explicitly provided, Triton will use this attribute to create model
-/// deployment that aligns more with the backend preference.
-///
-/// \param backend_attributes The backend attributes object.
-/// \param kind The kind of the instance group.
-/// \param count The number of instances per device. Triton default will be used
-/// if 0 is provided.
-/// \param device_ids The devices where instances should be available. Triton
-/// default will be used if 'nullptr' is provided.
-/// \param id_count The number of devices in 'device_ids'.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendAttributeAddPreferredInstanceGroup(
-    TRITONBACKEND_BackendAttribute* backend_attributes,
-    const TRITONSERVER_InstanceGroupKind kind, const uint64_t count,
-    const uint64_t* device_ids, const uint64_t id_count);
-
-#ifdef __cplusplus
-}
-#endif
--- a/3rdparty/core-r22.12/include/triton/core/tritonrepoagent.h
+++ b/3rdparty/core-r22.12/include/triton/core/tritonrepoagent.h
-// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <stddef.h>
-#include <stdint.h>
-#include "triton/core/tritonserver.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef _COMPILING_TRITONREPOAGENT
-#if defined(_MSC_VER)
-#define TRITONREPOAGENT_DECLSPEC __declspec(dllexport)
-#define TRITONREPOAGENT_ISPEC __declspec(dllimport)
-#elif defined(__GNUC__)
-#define TRITONREPOAGENT_DECLSPEC __attribute__((__visibility__("default")))
-#define TRITONREPOAGENT_ISPEC
-#else
-#define TRITONREPOAGENT_DECLSPEC
-#define TRITONREPOAGENT_ISPEC
-#endif
-#else
-#if defined(_MSC_VER)
-#define TRITONREPOAGENT_DECLSPEC __declspec(dllimport)
-#define TRITONREPOAGENT_ISPEC __declspec(dllexport)
-#else
-#define TRITONREPOAGENT_DECLSPEC
-#define TRITONREPOAGENT_ISPEC
-#endif
-#endif
-
-struct TRITONREPOAGENT_Agent;
-struct TRITONREPOAGENT_AgentModel;
-
-///
-/// TRITONREPOAGENT API Version
-///
-/// The TRITONREPOAGENT API is versioned with major and minor version
-/// numbers. Any change to the API that does not impact backwards
-/// compatibility (for example, adding a non-required function)
-/// increases the minor version number. Any change that breaks
-/// backwards compatibility (for example, deleting or changing the
-/// behavior of a function) increases the major version number. A
-/// repository agent should check that the API version used to compile
-/// the agent is compatible with the API version of the Triton server
-/// that it is running in. This is typically done by code similar to
-/// the following which makes sure that the major versions are equal
-/// and that the minor version of Triton is >= the minor version used
-/// to build the agent.
-///
-///   uint32_t api_version_major, api_version_minor;
-///   TRITONREPOAGENT_ApiVersion(&api_version_major, &api_version_minor);
-///   if ((api_version_major != TRITONREPOAGENT_API_VERSION_MAJOR) ||
-///       (api_version_minor < TRITONREPOAGENT_API_VERSION_MINOR)) {
-///     return TRITONSERVER_ErrorNew(
-///       TRITONSERVER_ERROR_UNSUPPORTED,
-///       "triton repository agent API version does not support this agent");
-///   }
-///
-#define TRITONREPOAGENT_API_VERSION_MAJOR 0
-#define TRITONREPOAGENT_API_VERSION_MINOR 1
-
-/// Get the TRITONREPOAGENT API version supported by Triton. This
-/// value can be compared against the
-/// TRITONREPOAGENT_API_VERSION_MAJOR and
-/// TRITONREPOAGENT_API_VERSION_MINOR used to build the agent to
-/// ensure that Triton is compatible with the agent.
-///
-/// \param major Returns the TRITONREPOAGENT API major version supported
-/// by Triton.
-/// \param minor Returns the TRITONREPOAGENT API minor version supported
-/// by Triton.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error* TRITONREPOAGENT_ApiVersion(
-    uint32_t* major, uint32_t* minor);
-
-/// TRITONREPOAGENT_ArtifactType
-///
-/// The ways that the files that make up a model's repository content
-/// are communicated between Triton and the agent.
-///
-///   TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
-///     communicated to and from the repository agent via a locally
-///     accessible filesystem. The agent can access these files using
-///     an appropriate filesystem API.
-///
-///   TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
-///     communicated to and from the repository agent via a remote filesystem.
-///     The remote filesystem path follows the same convention as is used for
-///     repository paths, for example, "s3://" prefix indicates an S3 path.
-///
-typedef enum TRITONREPOAGENT_artifacttype_enum {
-  TRITONREPOAGENT_ARTIFACT_FILESYSTEM,
-  TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM
-} TRITONREPOAGENT_ArtifactType;
-
-/// TRITONREPOAGENT_ActionType
-///
-/// Types of repository actions that can be handled by an agent.
-/// The lifecycle of a TRITONREPOAGENT_AgentModel begins with a call to
-/// TRITONREPOAGENT_ModelInitialize and ends with a call to
-/// TRITONREPOAGENT_ModelFinalize. Between those calls the current lifecycle
-/// state of the model is communicated by calls to TRITONREPOAGENT_ModelAction.
-/// Possible lifecycles are:
-///
-/// LOAD -> LOAD_COMPLETE -> UNLOAD -> UNLOAD_COMPLETE
-/// LOAD -> LOAD_FAIL
-///
-///   TRITONREPOAGENT_ACTION_LOAD: A model is being loaded.
-///
-///   TRITONREPOAGENT_ACTION_LOAD_COMPLETE: The model load completed
-///     successfully and the model is now loaded.
-///
-///   TRITONREPOAGENT_ACTION_LOAD_FAIL: The model load did not complete
-///     successfully. The model is not loaded.
-///
-///   TRITONREPOAGENT_ACTION_UNLOAD: The model is being unloaded.
-///
-///   TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE: The model unload is complete.
-///
-typedef enum TRITONREPOAGENT_actiontype_enum {
-  TRITONREPOAGENT_ACTION_LOAD,
-  TRITONREPOAGENT_ACTION_LOAD_COMPLETE,
-  TRITONREPOAGENT_ACTION_LOAD_FAIL,
-  TRITONREPOAGENT_ACTION_UNLOAD,
-  TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE
-} TRITONREPOAGENT_ActionType;
-
-/// Get the location of the files that make up the model. The
-/// 'location' communicated depends on how the model is being
-/// communicated to the agent as indicated by 'artifact_type'.
-///
-///   TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
-///     made available to the agent via the local
-///     filesytem. 'location' returns the full path to the directory
-///     in the model repository that contains the model's
-///     artifacts. The returned location string is owned by Triton,
-///     not the caller, and so should not be modified or freed. The
-///     contents of the directory are owned by Triton, not the agent,
-///     and so the agent should not delete or modify the contents. Use
-///     TRITONREPOAGENT_RepositoryAcquire to get a location that can be
-///     used to modify the model repository contents.
-///
-///   TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
-///     made available to the agent via a remote filesystem.
-///     'location' returns the full path to the remote directory that contains
-///     the model's artifacts. The returned location string is owned by Triton,
-///     not the caller, and so should not be modified or freed. The contents of
-///     the remote directory are owned by Triton, not the agent,
-///     and so the agent should not delete or modify the contents.
-///     Use TRITONREPOAGENT_ModelRepositoryLocationAcquire to get a location
-///     that can be used to write updated model repository contents.
-///
-/// \param agent The agent.
-/// \param model The model.
-/// \param artifact_type Returns the artifact type for the location.
-/// \param path Returns the location.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelRepositoryLocation(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    TRITONREPOAGENT_ArtifactType* artifact_type, const char** location);
-
-/// Acquire a location where the agent can produce a new version of
-/// the model repository files. This is a convenience method to create
-/// a temporary directory for the agent. The agent is responsible for
-/// calling TRITONREPOAGENT_ModelRepositoryLocationDelete in
-/// TRITONREPOAGENT_ModelFinalize to delete the location. Initially the
-/// acquired location is empty. The 'location' communicated depends on
-/// the requested 'artifact_type'.
-///
-///   TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The location is a directory
-///     on the local filesystem. 'location' returns the full path to
-///     an empty directory that the agent should populate with the
-///     model's artifacts. The returned location string is owned by
-///     Triton, not the agent, and so should not be modified or freed.
-///
-/// \param agent The agent.
-/// \param model The model.
-/// \param artifact_type The artifact type for the location.
-/// \param path Returns the location.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const TRITONREPOAGENT_ArtifactType artifact_type, const char** location);
-
-/// Discard and release ownership of a previously acquired location
-/// and its contents. The agent must not access or modify the location
-/// or its contents after this call.
-///
-/// \param agent The agent.
-/// \param model The model.
-/// \param path The location to release.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelRepositoryLocationRelease(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const char* location);
-
-/// Inform Triton that the specified repository location should be used for
-/// the model in place of the original model repository. This method can only be
-/// called when TRITONREPOAGENT_ModelAction is invoked with
-/// TRITONREPOAGENT_ACTION_LOAD. The 'location' The 'location'
-/// communicated depends on how the repository is being
-/// communicated to Triton as indicated by 'artifact_type'.
-///
-///   TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
-///     made available to Triton via the local filesytem. 'location' returns
-///     the full path to the directory. Ownership of the contents of the
-///     returned directory are transferred to Triton and the agent should not
-///     modified or freed the contents until TRITONREPOAGENT_ModelFinalize.
-///     The local filesystem directory can be created using
-///     TRITONREPOAGENT_ModelReopsitroyLocationAcquire or the agent can use
-///     its own local filesystem API.
-///
-///   TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
-///     made available to Triton via a remote filesystem. 'location' returns
-///     the full path to the remote filesystem directory. Ownership of the
-///     contents of the returned directory are transferred to Triton and
-///     the agent should not modified or freed the contents until
-///     TRITONREPOAGENT_ModelFinalize.
-///
-/// \param agent The agent.
-/// \param model The model.
-/// \param artifact_type The artifact type for the location.
-/// \param path Returns the location.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelRepositoryUpdate(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const TRITONREPOAGENT_ArtifactType artifact_type, const char* location);
-
-/// Get the number of agent parameters defined for a model.
-///
-/// \param agent The agent.
-/// \param model The model.
-/// \param count Returns the number of input tensors.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelParameterCount(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    uint32_t* count);
-
-/// Get a parameter name and value. The caller does not own the
-/// returned strings and must not modify or delete them.
-///
-/// \param agent The agent.
-/// \param model The model.
-/// \param index The index of the parameter. Must be 0 <= index <
-/// count, where count is the value returned by
-/// TRITONREPOAGENT_ModelParameterCount.
-/// \param parameter_name Returns the name of the parameter.
-/// \param parameter_value Returns the value of the parameter.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error* TRITONREPOAGENT_ModelParameter(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const uint32_t index, const char** parameter_name,
-    const char** parameter_value);
-
-/// Get the model configuration. The caller takes ownership of the
-/// message object and must call TRITONSERVER_MessageDelete to release
-/// the object. If the model repository does not contain a
-/// config.pbtxt file then 'model_config' is returned as nullptr.
-///
-/// \param agent The agent.
-/// \param model The model.
-/// \param config_version The model configuration will be returned in
-/// a format matching this version. If the configuration cannot be
-/// represented in the requested version's format then an error will
-/// be returned. Currently only version 1 is supported.
-/// \param model_config Returns the model configuration as a message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error* TRITONREPOAGENT_ModelConfig(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const uint32_t config_version, TRITONSERVER_Message** model_config);
-
-/// Get the user-specified state associated with the model.
-///
-/// \param model The agent model.
-/// \param state Returns the user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error* TRITONREPOAGENT_ModelState(
-    TRITONREPOAGENT_AgentModel* model, void** state);
-
-/// Set the user-specified state associated with the model.
-///
-/// \param model The agent model.
-/// \param state The user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error* TRITONREPOAGENT_ModelSetState(
-    TRITONREPOAGENT_AgentModel* model, void* state);
-
-/// Get the user-specified state associated with the agent.
-///
-/// \param agent The agent.
-/// \param state Returns the user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error* TRITONREPOAGENT_State(
-    TRITONREPOAGENT_Agent* agent, void** state);
-
-/// Set the user-specified state associated with the agent.
-///
-/// \param agent The agent.
-/// \param state The user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error* TRITONREPOAGENT_SetState(
-    TRITONREPOAGENT_Agent* agent, void* state);
-
-///
-/// The following functions can be implemented by an agent. Functions
-/// indicated as required must be implemented or the agent will fail
-/// to load.
-///
-
-/// Initialize an agent. This function is optional. This function is
-/// called once when an agent is loaded to allow the agent to
-/// initialize any state associated with the agent. An agent has a
-/// single state that is shared across all invocations of the agent.
-///
-/// \param agent The agent.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_ISPEC TRITONSERVER_Error* TRITONREPOAGENT_Initialize(
-    TRITONREPOAGENT_Agent* agent);
-
-/// Finalize for an agent. This function is optional. This function is
-/// called once, just before the agent is unloaded. All state
-/// associated with the agent should be freed and any threads created
-/// for the agent should be exited/joined before returning from this
-/// function.
-///
-/// \param agent The agent.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_ISPEC TRITONSERVER_Error* TRITONREPOAGENT_Finalize(
-    TRITONREPOAGENT_Agent* agent);
-
-/// Initialize a model associated with an agent. This function is optional.
-/// This function is called once when an agent model's lifecycle begins to allow
-/// the agent model to initialize any state associated with it. An agent model
-/// has a single state that is shared across all the lifecycle of the agent
-/// model.
-///
-/// \param agent The agent to be associated with the model.
-/// \param model The model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_ISPEC TRITONSERVER_Error* TRITONREPOAGENT_ModelInitialize(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model);
-
-/// Finalize for a model. This function is optional. This function is
-/// called once, just before the end of the agent model's lifecycle. All state
-/// associated with the agent model should be freed and any threads created
-/// for the agent model should be exited/joined before returning from this
-/// function. If the model acquired a model location using
-/// TRITONREPOAGENT_ModelRepositoryLocationAcquire, it must call
-/// TRITONREPOAGENT_ModelRepositoryLocationRelease to release that location.
-///
-/// \param agent The agent associated with the model.
-/// \param model The model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_ISPEC TRITONSERVER_Error* TRITONREPOAGENT_ModelFinalize(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model);
-
-/// Handle an action for a specified model. This function is
-/// required. Triton will not perform multiple simultaneous calls to
-/// this function for a given agent and model; however, there may be
-/// simultaneous calls for the agent for different models.
-///
-/// If the agent does not handle the action the agent should
-/// immediately return success (nullptr).
-///
-/// Any modification to the model's repository must be made when 'action_type'
-/// is TRITONREPOAGENT_ACTION_LOAD.
-/// To modify the model's repository the agent must either acquire a mutable
-/// location via TRITONREPOAGENT_ModelRepositoryLocationAcquire
-/// or its own managed location, report the location to Triton via
-/// TRITONREPOAGENT_ModelRepositoryUpdate, and then return
-/// success (nullptr). If the agent does not need to make any changes
-/// to the model repository it should not call
-/// TRITONREPOAGENT_ModelRepositoryUpdate and then return success.
-/// To indicate that a model load should fail return a non-success status.
-///
-/// \param agent The agent.
-/// \param model The model that is the target of the action.
-/// \action_type The type of action the agent should handle for the model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_ISPEC TRITONSERVER_Error* TRITONREPOAGENT_ModelAction(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const TRITONREPOAGENT_ActionType action_type);
-
-#ifdef __cplusplus
-}
-#endif
--- a/3rdparty/core-r22.12/include/triton/core/tritonserver.h
+++ b/3rdparty/core-r22.12/include/triton/core/tritonserver.h
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-/// \file
-
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef _COMPILING_TRITONSERVER
-#if defined(_MSC_VER)
-#define TRITONSERVER_DECLSPEC __declspec(dllexport)
-#elif defined(__GNUC__)
-#define TRITONSERVER_DECLSPEC __attribute__((__visibility__("default")))
-#else
-#define TRITONSERVER_DECLSPEC
-#endif
-#else
-#if defined(_MSC_VER)
-#define TRITONSERVER_DECLSPEC __declspec(dllimport)
-#else
-#define TRITONSERVER_DECLSPEC
-#endif
-#endif
-
-struct TRITONSERVER_BufferAttributes;
-struct TRITONSERVER_Error;
-struct TRITONSERVER_InferenceRequest;
-struct TRITONSERVER_InferenceResponse;
-struct TRITONSERVER_InferenceTrace;
-struct TRITONSERVER_Message;
-struct TRITONSERVER_Metrics;
-struct TRITONSERVER_Parameter;
-struct TRITONSERVER_ResponseAllocator;
-struct TRITONSERVER_Server;
-struct TRITONSERVER_ServerOptions;
-struct TRITONSERVER_Metric;
-struct TRITONSERVER_MetricFamily;
-
-///
-/// TRITONSERVER API Version
-///
-/// The TRITONSERVER API is versioned with major and minor version
-/// numbers. Any change to the API that does not impact backwards
-/// compatibility (for example, adding a non-required function)
-/// increases the minor version number. Any change that breaks
-/// backwards compatibility (for example, deleting or changing the
-/// behavior of a function) increases the major version number. A
-/// client should check that the API version used to compile the
-/// client is compatible with the API version of the Triton shared
-/// library that it is linking against. This is typically done by code
-/// similar to the following which makes sure that the major versions
-/// are equal and that the minor version of the Triton shared library
-/// is >= the minor version used to build the client.
-///
-///   uint32_t api_version_major, api_version_minor;
-///   TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor);
-///   if ((api_version_major != TRITONSERVER_API_VERSION_MAJOR) ||
-///       (api_version_minor < TRITONSERVER_API_VERSION_MINOR)) {
-///     return TRITONSERVER_ErrorNew(
-///       TRITONSERVER_ERROR_UNSUPPORTED,
-///       "triton server API version does not support this client");
-///   }
-///
-#define TRITONSERVER_API_VERSION_MAJOR 1
-#define TRITONSERVER_API_VERSION_MINOR 17
-
-/// Get the TRITONBACKEND API version supported by the Triton shared
-/// library. This value can be compared against the
-/// TRITONSERVER_API_VERSION_MAJOR and TRITONSERVER_API_VERSION_MINOR
-/// used to build the client to ensure that Triton shared library is
-/// compatible with the client.
-///
-/// \param major Returns the TRITONSERVER API major version supported
-/// by Triton.
-/// \param minor Returns the TRITONSERVER API minor version supported
-/// by Triton.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ApiVersion(
-    uint32_t* major, uint32_t* minor);
-
-/// TRITONSERVER_DataType
-///
-/// Tensor data types recognized by TRITONSERVER.
-///
-typedef enum TRITONSERVER_datatype_enum {
-  TRITONSERVER_TYPE_INVALID,
-  TRITONSERVER_TYPE_BOOL,
-  TRITONSERVER_TYPE_UINT8,
-  TRITONSERVER_TYPE_UINT16,
-  TRITONSERVER_TYPE_UINT32,
-  TRITONSERVER_TYPE_UINT64,
-  TRITONSERVER_TYPE_INT8,
-  TRITONSERVER_TYPE_INT16,
-  TRITONSERVER_TYPE_INT32,
-  TRITONSERVER_TYPE_INT64,
-  TRITONSERVER_TYPE_FP16,
-  TRITONSERVER_TYPE_FP32,
-  TRITONSERVER_TYPE_FP64,
-  TRITONSERVER_TYPE_BYTES,
-  TRITONSERVER_TYPE_BF16
-} TRITONSERVER_DataType;
-
-/// Get the string representation of a data type. The returned string
-/// is not owned by the caller and so should not be modified or freed.
-///
-/// \param datatype The data type.
-/// \return The string representation of the data type.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_DataTypeString(
-    TRITONSERVER_DataType datatype);
-
-/// Get the Triton datatype corresponding to a string representation
-/// of a datatype.
-///
-/// \param dtype The datatype string representation.
-/// \return The Triton data type or TRITONSERVER_TYPE_INVALID if the
-/// string does not represent a data type.
-TRITONSERVER_DECLSPEC TRITONSERVER_DataType
-TRITONSERVER_StringToDataType(const char* dtype);
-
-/// Get the size of a Triton datatype in bytes. Zero is returned for
-/// TRITONSERVER_TYPE_BYTES because it have variable size. Zero is
-/// returned for TRITONSERVER_TYPE_INVALID.
-///
-/// \param dtype The datatype.
-/// \return The size of the datatype.
-TRITONSERVER_DECLSPEC uint32_t
-TRITONSERVER_DataTypeByteSize(TRITONSERVER_DataType datatype);
-
-/// TRITONSERVER_MemoryType
-///
-/// Types of memory recognized by TRITONSERVER.
-///
-typedef enum TRITONSERVER_memorytype_enum {
-  TRITONSERVER_MEMORY_CPU,
-  TRITONSERVER_MEMORY_CPU_PINNED,
-  TRITONSERVER_MEMORY_GPU
-} TRITONSERVER_MemoryType;
-
-/// Get the string representation of a memory type. The returned
-/// string is not owned by the caller and so should not be modified or
-/// freed.
-///
-/// \param memtype The memory type.
-/// \return The string representation of the memory type.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_MemoryTypeString(
-    TRITONSERVER_MemoryType memtype);
-
-/// TRITONSERVER_ParameterType
-///
-/// Types of parameters recognized by TRITONSERVER.
-///
-typedef enum TRITONSERVER_parametertype_enum {
-  TRITONSERVER_PARAMETER_STRING,
-  TRITONSERVER_PARAMETER_INT,
-  TRITONSERVER_PARAMETER_BOOL,
-  TRITONSERVER_PARAMETER_BYTES
-} TRITONSERVER_ParameterType;
-
-/// Get the string representation of a parameter type. The returned
-/// string is not owned by the caller and so should not be modified or
-/// freed.
-///
-/// \param paramtype The parameter type.
-/// \return The string representation of the parameter type.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_ParameterTypeString(
-    TRITONSERVER_ParameterType paramtype);
-
-/// Create a new parameter object. The caller takes ownership of the
-/// TRITONSERVER_Parameter object and must call TRITONSERVER_ParameterDelete to
-/// release the object. The object will maintain its own copy of the 'value'
-///
-/// \param name The parameter name.
-/// \param type The parameter type.
-/// \param value The pointer to the value.
-/// \return A new TRITONSERVER_Parameter object. 'nullptr' will be returned if
-/// 'type' is 'TRITONSERVER_PARAMETER_BYTES'. The caller should use
-/// TRITONSERVER_ParameterBytesNew to create parameter with bytes type.
-TRITONSERVER_DECLSPEC TRITONSERVER_Parameter* TRITONSERVER_ParameterNew(
-    const char* name, const TRITONSERVER_ParameterType type, const void* value);
-
-/// Create a new parameter object with type TRITONSERVER_PARAMETER_BYTES.
-/// The caller takes ownership of the TRITONSERVER_Parameter object and must
-/// call TRITONSERVER_ParameterDelete to release the object. The object only
-/// maintains a shallow copy of the 'byte_ptr' so the data content must be
-/// valid until the parameter object is deleted.
-///
-/// \param name The parameter name.
-/// \param byte_ptr The pointer to the data content.
-/// \param size The size of the data content.
-/// \return A new TRITONSERVER_Error object.
-TRITONSERVER_DECLSPEC TRITONSERVER_Parameter* TRITONSERVER_ParameterBytesNew(
-    const char* name, const void* byte_ptr, const uint64_t size);
-
-/// Delete an parameter object.
-///
-/// \param parameter The parameter object.
-TRITONSERVER_DECLSPEC void TRITONSERVER_ParameterDelete(
-    TRITONSERVER_Parameter* parameter);
-
-/// TRITONSERVER_InstanceGroupKind
-///
-/// Kinds of instance groups recognized by TRITONSERVER.
-///
-typedef enum TRITONSERVER_instancegroupkind_enum {
-  TRITONSERVER_INSTANCEGROUPKIND_AUTO,
-  TRITONSERVER_INSTANCEGROUPKIND_CPU,
-  TRITONSERVER_INSTANCEGROUPKIND_GPU,
-  TRITONSERVER_INSTANCEGROUPKIND_MODEL
-} TRITONSERVER_InstanceGroupKind;
-
-/// Get the string representation of an instance-group kind. The
-/// returned string is not owned by the caller and so should not be
-/// modified or freed.
-///
-/// \param kind The instance-group kind.
-/// \return The string representation of the kind.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_InstanceGroupKindString(
-    TRITONSERVER_InstanceGroupKind kind);
-
-/// TRITONSERVER_Logging
-///
-/// Types/levels of logging.
-///
-typedef enum TRITONSERVER_loglevel_enum {
-  TRITONSERVER_LOG_INFO,
-  TRITONSERVER_LOG_WARN,
-  TRITONSERVER_LOG_ERROR,
-  TRITONSERVER_LOG_VERBOSE
-} TRITONSERVER_LogLevel;
-
-///
-/// Format of logging.
-///
-/// TRITONSERVER_LOG_DEFAULT: the log severity (L) and timestamp will be
-/// logged as "LMMDD hh:mm:ss.ssssss".
-///
-/// TRITONSERVER_LOG_ISO8601: the log format will be "YYYY-MM-DDThh:mm:ssZ L".
-///
-typedef enum TRITONSERVER_logformat_enum {
-  TRITONSERVER_LOG_DEFAULT,
-  TRITONSERVER_LOG_ISO8601
-} TRITONSERVER_LogFormat;
-
-/// Is a log level enabled?
-///
-/// \param level The log level.
-/// \return True if the log level is enabled, false if not enabled.
-TRITONSERVER_DECLSPEC bool TRITONSERVER_LogIsEnabled(
-    TRITONSERVER_LogLevel level);
-
-/// Log a message at a given log level if that level is enabled.
-///
-/// \param level The log level.
-/// \param filename The file name of the location of the log message.
-/// \param line The line number of the log message.
-/// \param msg The log message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_LogMessage(
-    TRITONSERVER_LogLevel level, const char* filename, const int line,
-    const char* msg);
-
-/// TRITONSERVER_Error
-///
-/// Errors are reported by a TRITONSERVER_Error object. A NULL
-/// TRITONSERVER_Error indicates no error, a non-NULL TRITONSERVER_Error
-/// indicates error and the code and message for the error can be
-/// retrieved from the object.
-///
-/// The caller takes ownership of a TRITONSERVER_Error object returned by
-/// the API and must call TRITONSERVER_ErrorDelete to release the object.
-///
-
-/// The TRITONSERVER_Error error codes
-typedef enum TRITONSERVER_errorcode_enum {
-  TRITONSERVER_ERROR_UNKNOWN,
-  TRITONSERVER_ERROR_INTERNAL,
-  TRITONSERVER_ERROR_NOT_FOUND,
-  TRITONSERVER_ERROR_INVALID_ARG,
-  TRITONSERVER_ERROR_UNAVAILABLE,
-  TRITONSERVER_ERROR_UNSUPPORTED,
-  TRITONSERVER_ERROR_ALREADY_EXISTS
-} TRITONSERVER_Error_Code;
-
-/// Create a new error object. The caller takes ownership of the
-/// TRITONSERVER_Error object and must call TRITONSERVER_ErrorDelete to
-/// release the object.
-///
-/// \param code The error code.
-/// \param msg The error message.
-/// \return A new TRITONSERVER_Error object.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ErrorNew(
-    TRITONSERVER_Error_Code code, const char* msg);
-
-/// Delete an error object.
-///
-/// \param error The error object.
-TRITONSERVER_DECLSPEC void TRITONSERVER_ErrorDelete(TRITONSERVER_Error* error);
-
-/// Get the error code.
-///
-/// \param error The error object.
-/// \return The error code.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error_Code
-TRITONSERVER_ErrorCode(TRITONSERVER_Error* error);
-
-/// Get the string representation of an error code. The returned
-/// string is not owned by the caller and so should not be modified or
-/// freed. The lifetime of the returned string extends only as long as
-/// 'error' and must not be accessed once 'error' is deleted.
-///
-/// \param error The error object.
-/// \return The string representation of the error code.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_ErrorCodeString(
-    TRITONSERVER_Error* error);
-
-/// Get the error message. The returned string is not owned by the
-/// caller and so should not be modified or freed. The lifetime of the
-/// returned string extends only as long as 'error' and must not be
-/// accessed once 'error' is deleted.
-///
-/// \param error The error object.
-/// \return The error message.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_ErrorMessage(
-    TRITONSERVER_Error* error);
-
-/// TRITONSERVER_ResponseAllocator
-///
-/// Object representing a memory allocator for output tensors in an
-/// inference response.
-///
-
-/// Type for allocation function that allocates a buffer to hold an
-/// output tensor.
-///
-/// \param allocator The allocator that is provided in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param tensor_name The name of the output tensor to allocate for.
-/// \param byte_size The size of the buffer to allocate.
-/// \param memory_type The type of memory that the caller prefers for
-/// the buffer allocation.
-/// \param memory_type_id The ID of the memory that the caller prefers
-/// for the buffer allocation.
-/// \param userp The user data pointer that is provided as
-/// 'response_allocator_userp' in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param buffer Returns a pointer to the allocated memory.
-/// \param buffer_userp Returns a user-specified value to associate
-/// with the buffer, or nullptr if no user-specified value should be
-/// associated with the buffer. This value will be provided in the
-/// call to TRITONSERVER_ResponseAllocatorReleaseFn_t when the buffer
-/// is released and will also be returned by
-/// TRITONSERVER_InferenceResponseOutput.
-/// \param actual_memory_type Returns the type of memory where the
-/// allocation resides. May be different than the type of memory
-/// requested by 'memory_type'.
-/// \param actual_memory_type_id Returns the ID of the memory where
-/// the allocation resides. May be different than the ID of the memory
-/// requested by 'memory_type_id'.
-/// \return a TRITONSERVER_Error object if a failure occurs while
-/// attempting an allocation. If an error is returned all other return
-/// values will be ignored.
-typedef TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorAllocFn_t)(
-    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id, void* userp, void** buffer, void** buffer_userp,
-    TRITONSERVER_MemoryType* actual_memory_type,
-    int64_t* actual_memory_type_id);
-
-/// Type for allocation function that allocates a buffer to hold an
-/// output tensor with buffer attributes. The callback function must fill in the
-/// appropriate buffer attributes information related to this buffer. If set,
-/// this function is always called after TRITONSERVER_ResponseAllocatorAllocFn_t
-/// function.
-///
-/// \param allocator The allocator that is provided in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param tensor_name The name of the output tensor to allocate for.
-/// \param buffer_attributes The buffer attributes associated with the buffer.
-/// \param userp The user data pointer that is provided as
-/// 'response_allocator_userp' in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param buffer_userp Returns a user-specified value to associate
-/// with the buffer, or nullptr if no user-specified value should be
-/// associated with the buffer. This value will be provided in the
-/// call to TRITONSERVER_ResponseAllocatorReleaseFn_t when the buffer
-/// is released and will also be returned by
-/// TRITONSERVER_InferenceResponseOutput.
-/// \return a TRITONSERVER_Error object if a failure occurs while
-/// attempting an allocation. If an error is returned all other return
-/// values will be ignored.
-typedef TRITONSERVER_Error* (
-    *TRITONSERVER_ResponseAllocatorBufferAttributesFn_t)(
-    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
-    TRITONSERVER_BufferAttributes* buffer_attributes, void* userp,
-    void* buffer_userp);
-
-/// Type for function that is called to query the allocator's preferred memory
-/// type and memory type ID. As much as possible, the allocator should attempt
-/// to return the same memory_type and memory_type_id values that will be
-/// returned by the subsequent call to TRITONSERVER_ResponseAllocatorAllocFn_t.
-/// But the allocator is not required to do so.
-///
-/// \param allocator The allocator that is provided in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param userp The user data pointer that is provided as
-/// 'response_allocator_userp' in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param tensor_name The name of the output tensor. This is optional
-/// and it should be set to nullptr to indicate that the tensor name has
-/// not determined.
-/// \param byte_size The expected size of the buffer. This is optional
-/// and it should be set to nullptr to indicate that the byte size has
-/// not determined.
-/// \param memory_type Acts as both input and output. On input gives
-/// the memory type preferred by the caller. Returns memory type preferred
-/// by the allocator, taken account of the caller preferred type.
-/// \param memory_type_id Acts as both input and output. On input gives
-/// the memory type ID preferred by the caller. Returns memory type ID preferred
-/// by the allocator, taken account of the caller preferred type ID.
-/// \return a TRITONSERVER_Error object if a failure occurs.
-typedef TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorQueryFn_t)(
-    TRITONSERVER_ResponseAllocator* allocator, void* userp,
-    const char* tensor_name, size_t* byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
-
-/// Type for function that is called when the server no longer holds
-/// any reference to a buffer allocated by
-/// TRITONSERVER_ResponseAllocatorAllocFn_t. In practice this function
-/// is typically called when the response object associated with the
-/// buffer is deleted by TRITONSERVER_InferenceResponseDelete.
-///
-/// \param allocator The allocator that is provided in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param buffer Pointer to the buffer to be freed.
-/// \param buffer_userp The user-specified value associated
-/// with the buffer in TRITONSERVER_ResponseAllocatorAllocFn_t.
-/// \param byte_size The size of the buffer.
-/// \param memory_type The type of memory holding the buffer.
-/// \param memory_type_id The ID of the memory holding the buffer.
-/// \return a TRITONSERVER_Error object if a failure occurs while
-/// attempting the release. If an error is returned Triton will not
-/// attempt to release the buffer again.
-typedef TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorReleaseFn_t)(
-    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id);
-
-/// Type for function that is called to indicate that subsequent
-/// allocation requests will refer to a new response.
-///
-/// \param allocator The allocator that is provided in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param userp The user data pointer that is provided as
-/// 'response_allocator_userp' in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \return a TRITONSERVER_Error object if a failure occurs.
-typedef TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorStartFn_t)(
-    TRITONSERVER_ResponseAllocator* allocator, void* userp);
-
-/// Create a new response allocator object.
-///
-/// The response allocator object is used by Triton to allocate
-/// buffers to hold the output tensors in inference responses. Most
-/// models generate a single response for each inference request
-/// (TRITONSERVER_TXN_ONE_TO_ONE). For these models the order of
-/// callbacks will be:
-///
-///   TRITONSERVER_ServerInferAsync called
-///    - start_fn : optional (and typically not required)
-///    - alloc_fn : called once for each output tensor in response
-///   TRITONSERVER_InferenceResponseDelete called
-///    - release_fn: called once for each output tensor in response
-///
-/// For models that generate multiple responses for each inference
-/// request (TRITONSERVER_TXN_DECOUPLED), the start_fn callback can be
-/// used to determine sets of alloc_fn callbacks that belong to the
-/// same response:
-///
-///   TRITONSERVER_ServerInferAsync called
-///    - start_fn
-///    - alloc_fn : called once for each output tensor in response
-///    - start_fn
-///    - alloc_fn : called once for each output tensor in response
-///      ...
-///   For each response, TRITONSERVER_InferenceResponseDelete called
-///    - release_fn: called once for each output tensor in the response
-///
-/// In all cases the start_fn, alloc_fn and release_fn callback
-/// functions must be thread-safe. Typically making these functions
-/// thread-safe does not require explicit locking. The recommended way
-/// to implement these functions is to have each inference request
-/// provide a 'response_allocator_userp' object that is unique to that
-/// request with TRITONSERVER_InferenceRequestSetResponseCallback. The
-/// callback functions then operate only on this unique state. Locking
-/// is required only when the callback function needs to access state
-/// that is shared across inference requests (for example, a common
-/// allocation pool).
-///
-/// \param allocator Returns the new response allocator object.
-/// \param alloc_fn The function to call to allocate buffers for result
-/// tensors.
-/// \param release_fn The function to call when the server no longer
-/// holds a reference to an allocated buffer.
-/// \param start_fn The function to call to indicate that the
-/// subsequent 'alloc_fn' calls are for a new response. This callback
-/// is optional (use nullptr to indicate that it should not be
-/// invoked).
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorNew(
-    TRITONSERVER_ResponseAllocator** allocator,
-    TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn,
-    TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn,
-    TRITONSERVER_ResponseAllocatorStartFn_t start_fn);
-
-/// Set the buffer attributes function for a response allocator object.
-/// The function will be called after alloc_fn to set the buffer attributes
-/// associated with the output buffer.
-///
-/// The thread-safy requirement for buffer_attributes_fn is the same as other
-/// allocator callbacks.
-///
-/// \param allocator The response allocator object.
-/// \param buffer_attributes_fn The function to call to get the buffer
-/// attributes information for an allocated buffer.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction(
-    TRITONSERVER_ResponseAllocator* allocator,
-    TRITONSERVER_ResponseAllocatorBufferAttributesFn_t buffer_attributes_fn);
-
-/// Set the query function to a response allocator object. Usually the
-/// function will be called before alloc_fn to understand what is the
-/// allocator's preferred memory type and memory type ID at the current
-/// situation to make different execution decision.
-///
-/// The thread-safy requirement for query_fn is the same as other allocator
-/// callbacks.
-///
-/// \param allocator The response allocator object.
-/// \param query_fn The function to call to query allocator's preferred memory
-/// type and memory type ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ResponseAllocatorSetQueryFunction(
-    TRITONSERVER_ResponseAllocator* allocator,
-    TRITONSERVER_ResponseAllocatorQueryFn_t query_fn);
-
-/// Delete a response allocator.
-///
-/// \param allocator The response allocator object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorDelete(
-    TRITONSERVER_ResponseAllocator* allocator);
-
-/// TRITONSERVER_Message
-///
-/// Object representing a Triton Server message.
-///
-
-/// Create a new message object from serialized JSON string.
-///
-/// \param message The message object.
-/// \param base The base of the serialized JSON.
-/// \param byte_size The size, in bytes, of the serialized message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_MessageNewFromSerializedJson(
-    TRITONSERVER_Message** message, const char* base, size_t byte_size);
-
-/// Delete a message object.
-///
-/// \param message The message object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MessageDelete(
-    TRITONSERVER_Message* message);
-
-/// Get the base and size of the buffer containing the serialized
-/// message in JSON format. The buffer is owned by the
-/// TRITONSERVER_Message object and should not be modified or freed by
-/// the caller. The lifetime of the buffer extends only as long as
-/// 'message' and must not be accessed once 'message' is deleted.
-///
-/// \param message The message object.
-/// \param base Returns the base of the serialized message.
-/// \param byte_size Returns the size, in bytes, of the serialized
-/// message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MessageSerializeToJson(
-    TRITONSERVER_Message* message, const char** base, size_t* byte_size);
-
-/// TRITONSERVER_Metrics
-///
-/// Object representing metrics.
-///
-
-/// Metric format types
-typedef enum tritonserver_metricformat_enum {
-  TRITONSERVER_METRIC_PROMETHEUS
-} TRITONSERVER_MetricFormat;
-
-/// Delete a metrics object.
-///
-/// \param metrics The metrics object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricsDelete(
-    TRITONSERVER_Metrics* metrics);
-
-/// Get a buffer containing the metrics in the specified format. For
-/// each format the buffer contains the following:
-///
-///   TRITONSERVER_METRIC_PROMETHEUS: 'base' points to a single multiline
-///   string (char*) that gives a text representation of the metrics in
-///   prometheus format. 'byte_size' returns the length of the string
-///   in bytes.
-///
-/// The buffer is owned by the 'metrics' object and should not be
-/// modified or freed by the caller. The lifetime of the buffer
-/// extends only as long as 'metrics' and must not be accessed once
-/// 'metrics' is deleted.
-///
-/// \param metrics The metrics object.
-/// \param format The format to use for the returned metrics.
-/// \param base Returns a pointer to the base of the formatted
-/// metrics, as described above.
-/// \param byte_size Returns the size, in bytes, of the formatted
-/// metrics.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricsFormatted(
-    TRITONSERVER_Metrics* metrics, TRITONSERVER_MetricFormat format,
-    const char** base, size_t* byte_size);
-
-/// TRITONSERVER_InferenceTrace
-///
-/// Object that represents tracing for an inference request.
-///
-
-/// Trace levels. The trace level controls the type of trace
-/// activities that are reported for an inference request.
-///
-/// Trace level values are power-of-2 and can be combined to trace
-/// multiple types of activities. For example, use
-/// (TRITONSERVER_TRACE_LEVEL_TIMESTAMPS |
-/// TRITONSERVER_TRACE_LEVEL_TENSORS) to trace both timestamps and
-/// tensors for an inference request.
-///
-/// TRITONSERVER_TRACE_LEVEL_MIN and TRITONSERVER_TRACE_LEVEL_MAX are
-/// deprecated and should not be used.
-typedef enum tritonserver_tracelevel_enum {
-  /// Tracing disabled. No trace activities are reported.
-  TRITONSERVER_TRACE_LEVEL_DISABLED = 0,
-  /// Deprecated. Use TRITONSERVER_TRACE_LEVEL_TIMESTAMPS.
-  TRITONSERVER_TRACE_LEVEL_MIN = 1,
-  /// Deprecated. Use TRITONSERVER_TRACE_LEVEL_TIMESTAMPS.
-  TRITONSERVER_TRACE_LEVEL_MAX = 2,
-  /// Record timestamps for the inference request.
-  TRITONSERVER_TRACE_LEVEL_TIMESTAMPS = 0x4,
-  /// Record input and output tensor values for the inference request.
-  TRITONSERVER_TRACE_LEVEL_TENSORS = 0x8
-} TRITONSERVER_InferenceTraceLevel;
-
-/// Get the string representation of a trace level. The returned
-/// string is not owned by the caller and so should not be modified or
-/// freed.
-///
-/// \param level The trace level.
-/// \return The string representation of the trace level.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_InferenceTraceLevelString(
-    TRITONSERVER_InferenceTraceLevel level);
-
-/// Trace activities
-typedef enum tritonserver_traceactivity_enum {
-  TRITONSERVER_TRACE_REQUEST_START = 0,
-  TRITONSERVER_TRACE_QUEUE_START = 1,
-  TRITONSERVER_TRACE_COMPUTE_START = 2,
-  TRITONSERVER_TRACE_COMPUTE_INPUT_END = 3,
-  TRITONSERVER_TRACE_COMPUTE_OUTPUT_START = 4,
-  TRITONSERVER_TRACE_COMPUTE_END = 5,
-  TRITONSERVER_TRACE_REQUEST_END = 6,
-  TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT = 7,
-  TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT = 8,
-  TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT = 9
-} TRITONSERVER_InferenceTraceActivity;
-
-/// Get the string representation of a trace activity. The returned
-/// string is not owned by the caller and so should not be modified or
-/// freed.
-///
-/// \param activity The trace activity.
-/// \return The string representation of the trace activity.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_InferenceTraceActivityString(
-    TRITONSERVER_InferenceTraceActivity activity);
-
-/// Type for trace timeline activity callback function. This callback function
-/// is used to report activity occurring for a trace. This function
-/// does not take ownership of 'trace' and so any information needed
-/// from that object must be copied before returning. The 'userp' data
-/// is the same as what is supplied in the call to
-/// TRITONSERVER_InferenceTraceNew.
-typedef void (*TRITONSERVER_InferenceTraceActivityFn_t)(
-    TRITONSERVER_InferenceTrace* trace,
-    TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
-    void* userp);
-
-/// Type for trace tensor activity callback function. This callback function
-/// is used to report tensor activity occurring for a trace. This function
-/// does not take ownership of 'trace' and so any information needed
-/// from that object must be copied before returning. The 'userp' data
-/// is the same as what is supplied in the call to
-/// TRITONSERVER_InferenceTraceTensorNew.
-typedef void (*TRITONSERVER_InferenceTraceTensorActivityFn_t)(
-    TRITONSERVER_InferenceTrace* trace,
-    TRITONSERVER_InferenceTraceActivity activity, const char* name,
-    TRITONSERVER_DataType datatype, const void* base, size_t byte_size,
-    const int64_t* shape, uint64_t dim_count,
-    TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void* userp);
-
-/// Type for trace release callback function. This callback function
-/// is called when all activity for the trace has completed. The
-/// callback function takes ownership of the
-/// TRITONSERVER_InferenceTrace object. The 'userp' data is the same
-/// as what is supplied in the call to TRITONSERVER_InferenceTraceNew.
-typedef void (*TRITONSERVER_InferenceTraceReleaseFn_t)(
-    TRITONSERVER_InferenceTrace* trace, void* userp);
-
-/// Create a new inference trace object. The caller takes ownership of
-/// the TRITONSERVER_InferenceTrace object and must call
-/// TRITONSERVER_InferenceTraceDelete to release the object.
-///
-/// The activity callback function will be called to report activity
-/// for 'trace' as well as for any child traces that are spawned by
-/// 'trace', and so the activity callback must check the trace object
-/// to determine specifically what activity is being reported.
-///
-/// The release callback is called for both 'trace' and for any child
-/// traces spawned by 'trace'.
-///
-/// \param trace Returns the new inference trace object.
-/// \param level The tracing level.
-/// \param parent_id The parent trace id for this trace. A value of 0
-/// indicates that there is not parent trace.
-/// \param activity_fn The callback function where activity for the
-/// trace is reported.
-/// \param release_fn The callback function called when all activity
-/// is complete for the trace.
-/// \param trace_userp User-provided pointer that is delivered to
-/// the activity and release callback functions.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceNew(
-    TRITONSERVER_InferenceTrace** trace, TRITONSERVER_InferenceTraceLevel level,
-    uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
-    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void* trace_userp);
-
-/// Create a new inference trace object. The caller takes ownership of
-/// the TRITONSERVER_InferenceTrace object and must call
-/// TRITONSERVER_InferenceTraceDelete to release the object.
-///
-/// The timeline and tensor activity callback function will be called to report
-/// activity for 'trace' as well as for any child traces that are spawned by
-/// 'trace', and so the activity callback must check the trace object
-/// to determine specifically what activity is being reported.
-///
-/// The release callback is called for both 'trace' and for any child
-/// traces spawned by 'trace'.
-///
-/// \param trace Returns the new inference trace object.
-/// \param level The tracing level.
-/// \param parent_id The parent trace id for this trace. A value of 0
-/// indicates that there is not parent trace.
-/// \param activity_fn The callback function where timeline activity for the
-/// trace is reported.
-/// \param tensor_activity_fn The callback function where tensor activity for
-/// the trace is reported.
-/// \param release_fn The callback function called when all activity
-/// is complete for the trace.
-/// \param trace_userp User-provided pointer that is delivered to
-/// the activity and release callback functions.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceTensorNew(
-    TRITONSERVER_InferenceTrace** trace, TRITONSERVER_InferenceTraceLevel level,
-    uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
-    TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn,
-    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void* trace_userp);
-
-/// Delete a trace object.
-///
-/// \param trace The trace object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceDelete(
-    TRITONSERVER_InferenceTrace* trace);
-
-/// Get the id associated with a trace. Every trace is assigned an id
-/// that is unique across all traces created for a Triton server.
-///
-/// \param trace The trace.
-/// \param id Returns the id associated with the trace.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceId(
-    TRITONSERVER_InferenceTrace* trace, uint64_t* id);
-
-/// Get the parent id associated with a trace. The parent id indicates
-/// a parent-child relationship between two traces. A parent id value
-/// of 0 indicates that there is no parent trace.
-///
-/// \param trace The trace.
-/// \param id Returns the parent id associated with the trace.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceParentId(
-    TRITONSERVER_InferenceTrace* trace, uint64_t* parent_id);
-
-/// Get the name of the model associated with a trace. The caller does
-/// not own the returned string and must not modify or delete it. The
-/// lifetime of the returned string extends only as long as 'trace'.
-///
-/// \param trace The trace.
-/// \param model_name Returns the name of the model associated with
-/// the trace.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceModelName(
-    TRITONSERVER_InferenceTrace* trace, const char** model_name);
-
-/// Get the version of the model associated with a trace.
-///
-/// \param trace The trace.
-/// \param model_version Returns the version of the model associated
-/// with the trace.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceTraceModelVersion(
-    TRITONSERVER_InferenceTrace* trace, int64_t* model_version);
-
-/// TRITONSERVER_InferenceRequest
-///
-/// Object representing an inference request. The inference request
-/// provides the meta-data and input tensor values needed for an
-/// inference and returns the inference result meta-data and output
-/// tensors. An inference request object can be modified and reused
-/// multiple times.
-///
-
-/// Inference request flags. The enum values must be power-of-2 values.
-typedef enum tritonserver_requestflag_enum {
-  TRITONSERVER_REQUEST_FLAG_SEQUENCE_START = 1,
-  TRITONSERVER_REQUEST_FLAG_SEQUENCE_END = 2
-} TRITONSERVER_RequestFlag;
-
-/// Inference request release flags. The enum values must be
-/// power-of-2 values.
-typedef enum tritonserver_requestreleaseflag_enum {
-  TRITONSERVER_REQUEST_RELEASE_ALL = 1
-} TRITONSERVER_RequestReleaseFlag;
-
-/// Inference response complete flags. The enum values must be
-/// power-of-2 values.
-typedef enum tritonserver_responsecompleteflag_enum {
-  TRITONSERVER_RESPONSE_COMPLETE_FINAL = 1
-} TRITONSERVER_ResponseCompleteFlag;
-
-/// Type for inference request release callback function. The callback
-/// indicates what type of release is being performed on the request
-/// and for some of these the callback function takes ownership of the
-/// TRITONSERVER_InferenceRequest object. The 'userp' data is the data
-/// provided as 'request_release_userp' in the call to
-/// TRITONSERVER_InferenceRequestSetReleaseCallback.
-///
-/// One or more flags will be specified when the callback is invoked,
-/// and the callback must take the following actions:
-///
-///   - TRITONSERVER_REQUEST_RELEASE_ALL: The entire inference request
-///     is being released and ownership is passed to the callback
-///     function. Triton will not longer access the 'request' object
-///     itself nor any input tensor data associated with the
-///     request. The callback should free or otherwise manage the
-///     'request' object and all associated tensor data.
-///
-/// Note that currently TRITONSERVER_REQUEST_RELEASE_ALL should always
-/// be set when the callback is invoked but in the future that may
-/// change, so the callback should explicitly check for the flag
-/// before taking ownership of the request object.
-///
-typedef void (*TRITONSERVER_InferenceRequestReleaseFn_t)(
-    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp);
-
-/// Type for callback function indicating that an inference response
-/// has completed. The callback function takes ownership of the
-/// TRITONSERVER_InferenceResponse object. The 'userp' data is the
-/// data provided as 'response_userp' in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-///
-/// One or more flags may be specified when the callback is invoked:
-///
-///   - TRITONSERVER_RESPONSE_COMPLETE_FINAL: Indicates that no more
-///     responses will be generated for a given request (more
-///     specifically, that no more responses will be generated for the
-///     inference request that set this callback and 'userp'). When
-///     this flag is set 'response' may be a response object or may be
-///     nullptr. If 'response' is not nullptr, then 'response' is the
-///     last response that Triton will produce for the request. If
-///     'response' is nullptr then Triton is indicating that no more
-///     responses will be produced for the request.
-typedef void (*TRITONSERVER_InferenceResponseCompleteFn_t)(
-    TRITONSERVER_InferenceResponse* response, const uint32_t flags,
-    void* userp);
-
-/// Create a new inference request object.
-///
-/// \param inference_request Returns the new request object.
-/// \param server the inference server object.
-/// \param model_name The name of the model to use for the request.
-/// \param model_version The version of the model to use for the
-/// request. If -1 then the server will choose a version based on the
-/// model's policy.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestNew(
-    TRITONSERVER_InferenceRequest** inference_request,
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version);
-
-/// Delete an inference request object.
-///
-/// \param inference_request The request object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestDelete(
-    TRITONSERVER_InferenceRequest* inference_request);
-
-/// Get the ID for a request. The returned ID is owned by
-/// 'inference_request' and must not be modified or freed by the
-/// caller.
-///
-/// \param inference_request The request object.
-/// \param id Returns the ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestId(
-    TRITONSERVER_InferenceRequest* inference_request, const char** id);
-
-/// Set the ID for a request.
-///
-/// \param inference_request The request object.
-/// \param id The ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetId(
-    TRITONSERVER_InferenceRequest* inference_request, const char* id);
-
-/// Get the flag(s) associated with a request. On return 'flags' holds
-/// a bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
-/// available flags.
-///
-/// \param inference_request The request object.
-/// \param flags Returns the flags.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestFlags(
-    TRITONSERVER_InferenceRequest* inference_request, uint32_t* flags);
-
-/// Set the flag(s) associated with a request. 'flags' should hold a
-/// bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
-/// available flags.
-///
-/// \param inference_request The request object.
-/// \param flags The flags.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetFlags(
-    TRITONSERVER_InferenceRequest* inference_request, uint32_t flags);
-
-/// Get the correlation ID of the inference request as an unsigned integer.
-/// Default is 0, which indicates that the request has no correlation ID.
-/// If the correlation id associated with the inference request is a string,
-/// this function will return a failure. The correlation ID is used
-/// to indicate two or more inference request are related to each other.
-/// How this relationship is handled by the inference server is determined by
-/// the model's scheduling policy.
-///
-/// \param inference_request The request object.
-/// \param correlation_id Returns the correlation ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestCorrelationId(
-    TRITONSERVER_InferenceRequest* inference_request, uint64_t* correlation_id);
-
-/// Get the correlation ID of the inference request as a string.
-/// Default is empty "", which indicates that the request has no correlation ID.
-/// If the correlation id associated with the inference request is an unsigned
-/// integer, then this function will return a failure. The correlation ID
-/// is used to indicate two or more inference request are related to each other.
-/// How this relationship is handled by the inference server is determined by
-/// the model's scheduling policy.
-///
-/// \param inference_request The request object.
-/// \param correlation_id Returns the correlation ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestCorrelationIdString(
-    TRITONSERVER_InferenceRequest* inference_request,
-    const char** correlation_id);
-
-/// Set the correlation ID of the inference request to be an unsigned integer.
-/// Default is 0, which indicates that the request has no correlation ID.
-/// The correlation ID is used to indicate two or more inference request
-/// are related to each other. How this relationship is handled by the
-/// inference server is determined by the model's scheduling policy.
-///
-/// \param inference_request The request object.
-/// \param correlation_id The correlation ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetCorrelationId(
-    TRITONSERVER_InferenceRequest* inference_request, uint64_t correlation_id);
-
-/// Set the correlation ID of the inference request to be a string.
-/// The correlation ID is used to indicate two or more inference
-/// request are related to each other. How this relationship is
-/// handled by the inference server is determined by the model's
-/// scheduling policy.
-///
-/// \param inference_request The request object.
-/// \param correlation_id The correlation ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetCorrelationIdString(
-    TRITONSERVER_InferenceRequest* inference_request,
-    const char* correlation_id);
-
-/// Get the priority for a request. The default is 0 indicating that
-/// the request does not specify a priority and so will use the
-/// model's default priority.
-///
-/// \param inference_request The request object.
-/// \param priority Returns the priority level.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestPriority(
-    TRITONSERVER_InferenceRequest* inference_request, uint32_t* priority);
-
-/// Set the priority for a request. The default is 0 indicating that
-/// the request does not specify a priority and so will use the
-/// model's default priority.
-///
-/// \param inference_request The request object.
-/// \param priority The priority level.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetPriority(
-    TRITONSERVER_InferenceRequest* inference_request, uint32_t priority);
-
-/// Get the timeout for a request, in microseconds. The default is 0
-/// which indicates that the request has no timeout.
-///
-/// \param inference_request The request object.
-/// \param timeout_us Returns the timeout, in microseconds.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestTimeoutMicroseconds(
-    TRITONSERVER_InferenceRequest* inference_request, uint64_t* timeout_us);
-
-/// Set the timeout for a request, in microseconds. The default is 0
-/// which indicates that the request has no timeout.
-///
-/// \param inference_request The request object.
-/// \param timeout_us The timeout, in microseconds.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetTimeoutMicroseconds(
-    TRITONSERVER_InferenceRequest* inference_request, uint64_t timeout_us);
-
-/// Add an input to a request.
-///
-/// \param inference_request The request object.
-/// \param name The name of the input.
-/// \param datatype The type of the input. Valid type names are BOOL,
-/// UINT8, UINT16, UINT32, UINT64, INT8, INT16, INT32, INT64, FP16,
-/// FP32, FP64, and BYTES.
-/// \param shape The shape of the input.
-/// \param dim_count The number of dimensions of 'shape'.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestAddInput(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name,
-    const TRITONSERVER_DataType datatype, const int64_t* shape,
-    uint64_t dim_count);
-
-/// Add a raw input to a request. The name recognized by the model, data type
-/// and shape of the input will be deduced from model configuration.
-/// This function must be called at most once on request with no other input to
-/// ensure the deduction is accurate.
-///
-/// \param inference_request The request object.
-/// \param name The name of the input. This name is only used as a reference
-/// of the raw input in other Tritonserver APIs. It doesn't assoicate with the
-/// name used in the model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAddRawInput(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name);
-
-/// Remove an input from a request.
-///
-/// \param inference_request The request object.
-/// \param name The name of the input.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestRemoveInput(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name);
-
-/// Remove all inputs from a request.
-///
-/// \param inference_request The request object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestRemoveAllInputs(
-    TRITONSERVER_InferenceRequest* inference_request);
-
-/// Assign a buffer of data to an input. The buffer will be appended
-/// to any existing buffers for that input. The 'inference_request'
-/// object takes ownership of the buffer and so the caller should not
-/// modify or free the buffer until that ownership is released by
-/// 'inference_request' being deleted or by the input being removed
-/// from 'inference_request'.
-///
-/// \param inference_request The request object.
-/// \param name The name of the input.
-/// \param base The base address of the input data.
-/// \param byte_size The size, in bytes, of the input data.
-/// \param memory_type The memory type of the input data.
-/// \param memory_type_id The memory type id of the input data.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAppendInputData(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name,
-    const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id);
-
-/// Assign a buffer of data to an input for execution on all model instances
-/// with the specified host policy. The buffer will be appended to any existing
-/// buffers for that input on all devices with this host policy. The
-/// 'inference_request' object takes ownership of the buffer and so the caller
-/// should not modify or free the buffer until that ownership is released by
-/// 'inference_request' being deleted or by the input being removed from
-/// 'inference_request'. If the execution is scheduled on a device that does not
-/// have a input buffer specified using this function, then the input buffer
-/// specified with TRITONSERVER_InferenceRequestAppendInputData will be used so
-/// a non-host policy specific version of data must be added using that API.
-/// \param inference_request The request object.
-/// \param name The name of the input.
-/// \param base The base address of the input data.
-/// \param byte_size The size, in bytes, of the input data.
-/// \param memory_type The memory type of the input data.
-/// \param memory_type_id The memory type id of the input data.
-/// \param host_policy_name All model instances executing with this host_policy
-/// will use this input buffer for execution.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name,
-    const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id, const char* host_policy_name);
-
-/// Assign a buffer of data to an input. The buffer will be appended
-/// to any existing buffers for that input. The 'inference_request'
-/// object takes ownership of the buffer and so the caller should not
-/// modify or free the buffer until that ownership is released by
-/// 'inference_request' being deleted or by the input being removed
-/// from 'inference_request'.
-///
-/// \param inference_request The request object.
-/// \param name The name of the input.
-/// \param base The base address of the input data.
-/// \param buffer_attributes The buffer attrubutes of the input.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name,
-    const void* base, TRITONSERVER_BufferAttributes* buffer_attributes);
-
-/// Clear all input data from an input, releasing ownership of the
-/// buffer(s) that were appended to the input with
-/// TRITONSERVER_InferenceRequestAppendInputData or
-/// TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy
-/// \param inference_request The request object.
-/// \param name The name of the input.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestRemoveAllInputData(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name);
-
-/// Add an output request to an inference request.
-///
-/// \param inference_request The request object.
-/// \param name The name of the output.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAddRequestedOutput(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name);
-
-/// Remove an output request from an inference request.
-///
-/// \param inference_request The request object.
-/// \param name The name of the output.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestRemoveRequestedOutput(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name);
-
-/// Remove all output requests from an inference request.
-///
-/// \param inference_request The request object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs(
-    TRITONSERVER_InferenceRequest* inference_request);
-
-/// Set the release callback for an inference request. The release
-/// callback is called by Triton to return ownership of the request
-/// object.
-///
-/// \param inference_request The request object.
-/// \param request_release_fn The function called to return ownership
-/// of the 'inference_request' object.
-/// \param request_release_userp User-provided pointer that is
-/// delivered to the 'request_release_fn' callback.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetReleaseCallback(
-    TRITONSERVER_InferenceRequest* inference_request,
-    TRITONSERVER_InferenceRequestReleaseFn_t request_release_fn,
-    void* request_release_userp);
-
-/// Set the allocator and response callback for an inference
-/// request. The allocator is used to allocate buffers for any output
-/// tensors included in responses that are produced for this
-/// request. The response callback is called to return response
-/// objects representing responses produced for this request.
-///
-/// \param inference_request The request object.
-/// \param response_allocator The TRITONSERVER_ResponseAllocator to use
-/// to allocate buffers to hold inference results.
-/// \param response_allocator_userp User-provided pointer that is
-/// delivered to the response allocator's start and allocation functions.
-/// \param response_fn The function called to deliver an inference
-/// response for this request.
-/// \param response_userp User-provided pointer that is delivered to
-/// the 'response_fn' callback.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetResponseCallback(
-    TRITONSERVER_InferenceRequest* inference_request,
-    TRITONSERVER_ResponseAllocator* response_allocator,
-    void* response_allocator_userp,
-    TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
-    void* response_userp);
-
-/// TRITONSERVER_InferenceResponse
-///
-/// Object representing an inference response. The inference response
-/// provides the meta-data and output tensor values calculated by the
-/// inference.
-///
-
-/// Delete an inference response object.
-///
-/// \param inference_response The response object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseDelete(
-    TRITONSERVER_InferenceResponse* inference_response);
-
-/// Return the error status of an inference response. Return a
-/// TRITONSERVER_Error object on failure, return nullptr on success.
-/// The returned error object is owned by 'inference_response' and so
-/// should not be deleted by the caller.
-///
-/// \param inference_response The response object.
-/// \return a TRITONSERVER_Error indicating the success or failure
-/// status of the response.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseError(
-    TRITONSERVER_InferenceResponse* inference_response);
-
-/// Get model used to produce a response. The caller does not own the
-/// returned model name value and must not modify or delete it. The
-/// lifetime of all returned values extends until 'inference_response'
-/// is deleted.
-///
-/// \param inference_response The response object.
-/// \param model_name Returns the name of the model.
-/// \param model_version Returns the version of the model.
-/// this response.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseModel(
-    TRITONSERVER_InferenceResponse* inference_response, const char** model_name,
-    int64_t* model_version);
-
-/// Get the ID of the request corresponding to a response. The caller
-/// does not own the returned ID and must not modify or delete it. The
-/// lifetime of all returned values extends until 'inference_response'
-/// is deleted.
-///
-/// \param inference_response The response object.
-/// \param request_id Returns the ID of the request corresponding to
-/// this response.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseId(
-    TRITONSERVER_InferenceResponse* inference_response,
-    const char** request_id);
-
-/// Get the number of parameters available in the response.
-///
-/// \param inference_response The response object.
-/// \param count Returns the number of parameters.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseParameterCount(
-    TRITONSERVER_InferenceResponse* inference_response, uint32_t* count);
-
-/// Get all information about a parameter. The caller does not own any
-/// of the returned values and must not modify or delete them. The
-/// lifetime of all returned values extends until 'inference_response'
-/// is deleted.
-///
-/// The 'vvalue' returns a void* pointer that must be cast
-/// appropriately based on 'type'. For example:
-///
-///   void* vvalue;
-///   TRITONSERVER_ParameterType type;
-///   TRITONSERVER_InferenceResponseParameter(
-///                     response, index, &name, &type, &vvalue);
-///   switch (type) {
-///     case TRITONSERVER_PARAMETER_BOOL:
-///       bool value = *(reinterpret_cast<bool*>(vvalue));
-///       ...
-///     case TRITONSERVER_PARAMETER_INT:
-///       int64_t value = *(reinterpret_cast<int64_t*>(vvalue));
-///       ...
-///     case TRITONSERVER_PARAMETER_STRING:
-///       const char* value = reinterpret_cast<const char*>(vvalue);
-///       ...
-///
-/// \param inference_response The response object.
-/// \param index The index of the parameter, must be 0 <= index <
-/// count, where 'count' is the value returned by
-/// TRITONSERVER_InferenceResponseParameterCount.
-/// \param name Returns the name of the parameter.
-/// \param type Returns the type of the parameter.
-/// \param vvalue Returns a pointer to the parameter value.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseParameter(
-    TRITONSERVER_InferenceResponse* inference_response, const uint32_t index,
-    const char** name, TRITONSERVER_ParameterType* type, const void** vvalue);
-
-/// Get the number of outputs available in the response.
-///
-/// \param inference_response The response object.
-/// \param count Returns the number of output tensors.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseOutputCount(
-    TRITONSERVER_InferenceResponse* inference_response, uint32_t* count);
-
-/// Get all information about an output tensor.  The tensor data is
-/// returned as the base pointer to the data and the size, in bytes,
-/// of the data. The caller does not own any of the returned values
-/// and must not modify or delete them. The lifetime of all returned
-/// values extends until 'inference_response' is deleted.
-///
-/// \param inference_response The response object.
-/// \param index The index of the output tensor, must be 0 <= index <
-/// count, where 'count' is the value returned by
-/// TRITONSERVER_InferenceResponseOutputCount.
-/// \param name Returns the name of the output.
-/// \param datatype Returns the type of the output.
-/// \param shape Returns the shape of the output.
-/// \param dim_count Returns the number of dimensions of the returned
-/// shape.
-/// \param base Returns the tensor data for the output.
-/// \param byte_size Returns the size, in bytes, of the data.
-/// \param memory_type Returns the memory type of the data.
-/// \param memory_type_id Returns the memory type id of the data.
-/// \param userp The user-specified value associated with the buffer
-/// in TRITONSERVER_ResponseAllocatorAllocFn_t.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseOutput(
-    TRITONSERVER_InferenceResponse* inference_response, const uint32_t index,
-    const char** name, TRITONSERVER_DataType* datatype, const int64_t** shape,
-    uint64_t* dim_count, const void** base, size_t* byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id,
-    void** userp);
-
-/// Get a classification label associated with an output for a given
-/// index.  The caller does not own the returned label and must not
-/// modify or delete it. The lifetime of all returned label extends
-/// until 'inference_response' is deleted.
-///
-/// \param inference_response The response object.
-/// \param index The index of the output tensor, must be 0 <= index <
-/// count, where 'count' is the value returned by
-/// TRITONSERVER_InferenceResponseOutputCount.
-/// \param class_index The index of the class.
-/// \param name Returns the label corresponding to 'class_index' or
-/// nullptr if no label.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseOutputClassificationLabel(
-    TRITONSERVER_InferenceResponse* inference_response, const uint32_t index,
-    const size_t class_index, const char** label);
-
-/// TRITONSERVER_BufferAttributes
-///
-/// API to create, modify, or retrieve attributes associated with a buffer.
-///
-
-/// Create a new buffer attributes object. The caller takes ownership of
-/// the TRITONSERVER_BufferAttributes object and must call
-/// TRITONSERVER_BufferAttributesDelete to release the object.
-///
-/// \param buffer_attributes Returns the new buffer attributes object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesNew(
-    TRITONSERVER_BufferAttributes** buffer_attributes);
-
-/// Delete a buffer attributes object.
-///
-/// \param buffer_attributes The buffer_attributes object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesDelete(
-    TRITONSERVER_BufferAttributes* buffer_attributes);
-
-/// Set the memory type id field of the buffer attributes.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param memory_type_id Memory type id to assign to the buffer attributes
-/// object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesSetMemoryTypeId(
-    TRITONSERVER_BufferAttributes* buffer_attributes, int64_t memory_type_id);
-
-/// Set the memory type field of the buffer attributes.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param memory_type Memory type to assign to the buffer attributes object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesSetMemoryType(
-    TRITONSERVER_BufferAttributes* buffer_attributes,
-    TRITONSERVER_MemoryType memory_type);
-
-/// Set the CudaIpcHandle field of the buffer attributes.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param cuda_ipc_handle The CudaIpcHandle to assign to the buffer attributes
-/// object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesSetCudaIpcHandle(
-    TRITONSERVER_BufferAttributes* buffer_attributes, void* cuda_ipc_handle);
-
-/// Set the byte size field of the buffer attributes.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param byte_size Byte size to assign to the buffer attributes object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesSetByteSize(
-    TRITONSERVER_BufferAttributes* buffer_attributes, size_t byte_size);
-
-/// Get the memory type id field of the buffer attributes.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param memory_type_id Returns the memory type id associated with the buffer
-/// attributes object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesMemoryTypeId(
-    TRITONSERVER_BufferAttributes* buffer_attributes, int64_t* memory_type_id);
-
-/// Get the memory type field of the buffer attributes.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param memory_type Returns the memory type associated with the buffer
-/// attributes object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesMemoryType(
-    TRITONSERVER_BufferAttributes* buffer_attributes,
-    TRITONSERVER_MemoryType* memory_type);
-
-/// Get the CudaIpcHandle field of the buffer attributes object.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param cuda_ipc_handle Returns the memory type associated with the buffer
-/// attributes object. If the cudaIpcHandle does not exist for the buffer,
-/// nullptr will be returned.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesCudaIpcHandle(
-    TRITONSERVER_BufferAttributes* buffer_attributes, void** cuda_ipc_handle);
-
-/// Get the byte size field of the buffer attributes.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param byte_size Returns the byte size associated with the buffer attributes
-/// object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesByteSize(
-    TRITONSERVER_BufferAttributes* buffer_attributes, size_t* byte_size);
-
-
-/// TRITONSERVER_ServerOptions
-///
-/// Options to use when creating an inference server.
-///
-
-/// Model control modes
-typedef enum tritonserver_modelcontrolmode_enum {
-  TRITONSERVER_MODEL_CONTROL_NONE,
-  TRITONSERVER_MODEL_CONTROL_POLL,
-  TRITONSERVER_MODEL_CONTROL_EXPLICIT
-} TRITONSERVER_ModelControlMode;
-
-/// Rate limit modes
-typedef enum tritonserver_ratelimitmode_enum {
-  TRITONSERVER_RATE_LIMIT_OFF,
-  TRITONSERVER_RATE_LIMIT_EXEC_COUNT
-} TRITONSERVER_RateLimitMode;
-
-/// Create a new server options object. The caller takes ownership of
-/// the TRITONSERVER_ServerOptions object and must call
-/// TRITONSERVER_ServerOptionsDelete to release the object.
-///
-/// \param options Returns the new server options object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsNew(
-    TRITONSERVER_ServerOptions** options);
-
-/// Delete a server options object.
-///
-/// \param options The server options object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsDelete(
-    TRITONSERVER_ServerOptions* options);
-
-/// Set the textual ID for the server in a server options. The ID is a
-/// name that identifies the server.
-///
-/// \param options The server options object.
-/// \param server_id The server identifier.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetServerId(
-    TRITONSERVER_ServerOptions* options, const char* server_id);
-
-/// Set the model repository path in a server options. The path must be
-/// the full absolute path to the model repository. This function can be called
-/// multiple times with different paths to set multiple model repositories.
-/// Note that if a model is not unique across all model repositories
-/// at any time, the model will not be available.
-///
-/// \param options The server options object.
-/// \param model_repository_path The full path to the model repository.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetModelRepositoryPath(
-    TRITONSERVER_ServerOptions* options, const char* model_repository_path);
-
-/// Set the model control mode in a server options. For each mode the models
-/// will be managed as the following:
-///
-///   TRITONSERVER_MODEL_CONTROL_NONE: the models in model repository will be
-///   loaded on startup. After startup any changes to the model repository will
-///   be ignored. Calling TRITONSERVER_ServerPollModelRepository will result in
-///   an error.
-///
-///   TRITONSERVER_MODEL_CONTROL_POLL: the models in model repository will be
-///   loaded on startup. The model repository can be polled periodically using
-///   TRITONSERVER_ServerPollModelRepository and the server will load, unload,
-///   and updated models according to changes in the model repository.
-///
-///   TRITONSERVER_MODEL_CONTROL_EXPLICIT: the models in model repository will
-///   not be loaded on startup. The corresponding model control APIs must be
-///   called to load / unload a model in the model repository.
-///
-/// \param options The server options object.
-/// \param mode The mode to use for the model control.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetModelControlMode(
-    TRITONSERVER_ServerOptions* options, TRITONSERVER_ModelControlMode mode);
-
-/// Set the model to be loaded at startup in a server options. The model must be
-/// present in one, and only one, of the specified model repositories.
-/// This function can be called multiple times with different model name
-/// to set multiple startup models.
-/// Note that it only takes affect on TRITONSERVER_MODEL_CONTROL_EXPLICIT mode.
-///
-/// \param options The server options object.
-/// \param mode_name The name of the model to load on startup.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetStartupModel(
-    TRITONSERVER_ServerOptions* options, const char* model_name);
-
-/// Enable or disable strict model configuration handling in a server
-/// options.
-///
-/// \param options The server options object.
-/// \param strict True to enable strict model configuration handling,
-/// false to disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetStrictModelConfig(
-    TRITONSERVER_ServerOptions* options, bool strict);
-
-/// Set the rate limit mode in a server options.
-///
-///   TRITONSERVER_RATE_LIMIT_EXEC_COUNT: The rate limiting prioritizes the
-///   inference execution using the number of times each instance has got a
-///   chance to run. The execution gets to run only when its resource
-///   constraints are satisfied.
-///
-///   TRITONSERVER_RATE_LIMIT_OFF: The rate limiting is turned off and the
-///   inference gets executed whenever an instance is available.
-///
-/// \param options The server options object.
-/// \param mode The mode to use for the rate limiting. By default, execution
-/// count is used to determine the priorities.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetRateLimiterMode(
-    TRITONSERVER_ServerOptions* options, TRITONSERVER_RateLimitMode mode);
-
-/// Add resource count for rate limiting.
-///
-/// \param options The server options object.
-/// \param name The name of the resource.
-/// \param count The count of the resource.
-/// \param device The device identifier for the resource. A value of -1
-/// indicates that the specified number of resources are available on every
-/// device. The device value is ignored for a global resource. The server
-/// will use the rate limiter configuration specified for instance groups
-/// in model config to determine whether resource is global. In case of
-/// conflicting resource type in different model configurations, server
-/// will raise an appropriate error while loading model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsAddRateLimiterResource(
-    TRITONSERVER_ServerOptions* options, const char* resource_name,
-    const size_t resource_count, const int device);
-
-/// Set the total pinned memory byte size that the server can allocate
-/// in a server options. The pinned memory pool will be shared across
-/// Triton itself and the backends that use
-/// TRITONBACKEND_MemoryManager to allocate memory.
-///
-/// \param options The server options object.
-/// \param size The pinned memory pool byte size.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize(
-    TRITONSERVER_ServerOptions* options, uint64_t size);
-
-/// Set the total CUDA memory byte size that the server can allocate
-/// on given GPU device in a server options. The pinned memory pool
-/// will be shared across Triton itself and the backends that use
-/// TRITONBACKEND_MemoryManager to allocate memory.
-///
-/// \param options The server options object.
-/// \param gpu_device The GPU device to allocate the memory pool.
-/// \param size The CUDA memory pool byte size.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize(
-    TRITONSERVER_ServerOptions* options, int gpu_device, uint64_t size);
-
-/// Set the total response cache byte size that the server can allocate in CPU
-/// memory. The response cache will be shared across all inference requests and
-/// across all models.
-///
-/// \param options The server options object.
-/// \param size The total response cache byte size.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetResponseCacheByteSize(
-    TRITONSERVER_ServerOptions* options, uint64_t size);
-
-/// Set the minimum support CUDA compute capability in a server
-/// options.
-///
-/// \param options The server options object.
-/// \param cc The minimum CUDA compute capability.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
-    TRITONSERVER_ServerOptions* options, double cc);
-
-/// Enable or disable exit-on-error in a server options.
-///
-/// \param options The server options object.
-/// \param exit True to enable exiting on intialization error, false
-/// to continue.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetExitOnError(
-    TRITONSERVER_ServerOptions* options, bool exit);
-
-/// Enable or disable strict readiness handling in a server options.
-///
-/// \param options The server options object.
-/// \param strict True to enable strict readiness handling, false to
-/// disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetStrictReadiness(
-    TRITONSERVER_ServerOptions* options, bool strict);
-
-/// Set the exit timeout, in seconds, for the server in a server
-/// options.
-///
-/// \param options The server options object.
-/// \param timeout The exit timeout, in seconds.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetExitTimeout(
-    TRITONSERVER_ServerOptions* options, unsigned int timeout);
-
-/// Set the number of threads used in buffer manager in a server options.
-///
-/// \param options The server options object.
-/// \param thread_count The number of threads.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetBufferManagerThreadCount(
-    TRITONSERVER_ServerOptions* options, unsigned int thread_count);
-
-/// Set the number of threads to concurrently load models in a server options.
-///
-/// \param options The server options object.
-/// \param thread_count The number of threads.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetModelLoadThreadCount(
-    TRITONSERVER_ServerOptions* options, unsigned int thread_count);
-
-/// Provide a log output file.
-///
-/// \param options The server options object.
-/// \param file a string defining the file where the log outputs will be saved.
-/// An empty string for the file name will cause triton to direct logging
-/// facilities to the console
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogFile(
-    TRITONSERVER_ServerOptions* options, const char* file);
-
-/// Enable or disable info level logging.
-///
-/// \param options The server options object.
-/// \param log True to enable info logging, false to disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogInfo(
-    TRITONSERVER_ServerOptions* options, bool log);
-
-/// Enable or disable warning level logging.
-///
-/// \param options The server options object.
-/// \param log True to enable warning logging, false to disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogWarn(
-    TRITONSERVER_ServerOptions* options, bool log);
-
-/// Enable or disable error level logging.
-///
-/// \param options The server options object.
-/// \param log True to enable error logging, false to disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogError(
-    TRITONSERVER_ServerOptions* options, bool log);
-
-/// Set the logging format.
-///
-/// \param options The server options object.
-/// \param format The logging format.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetLogFormat(
-    TRITONSERVER_ServerOptions* options, const TRITONSERVER_LogFormat format);
-
-/// Set verbose logging level. Level zero disables verbose logging.
-///
-/// \param options The server options object.
-/// \param level The verbose logging level.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetLogVerbose(
-    TRITONSERVER_ServerOptions* options, int level);
-
-/// Enable or disable metrics collection in a server options.
-///
-/// \param options The server options object.
-/// \param metrics True to enable metrics, false to disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetMetrics(
-    TRITONSERVER_ServerOptions* options, bool metrics);
-
-/// Enable or disable GPU metrics collection in a server options. GPU
-/// metrics are collected if both this option and
-/// TRITONSERVER_ServerOptionsSetMetrics are true.
-///
-/// \param options The server options object.
-/// \param gpu_metrics True to enable GPU metrics, false to disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetGpuMetrics(
-    TRITONSERVER_ServerOptions* options, bool gpu_metrics);
-
-/// Enable or disable CPU metrics collection in a server options. CPU
-/// metrics are collected if both this option and
-/// TRITONSERVER_ServerOptionsSetMetrics are true.
-///
-/// \param options The server options object.
-/// \param cpu_metrics True to enable CPU metrics, false to disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetCpuMetrics(
-    TRITONSERVER_ServerOptions* options, bool cpu_metrics);
-
-/// Set the interval for metrics collection in a server options.
-/// This is 2000 milliseconds by default.
-///
-/// \param options The server options object.
-/// \param metrics_interval_ms The time interval in ms between
-/// successive metrics updates.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetMetricsInterval(
-    TRITONSERVER_ServerOptions* options, uint64_t metrics_interval_ms);
-
-/// Set the directory containing backend shared libraries. This
-/// directory is searched last after the version and model directory
-/// in the model repository when looking for the backend shared
-/// library for a model. If the backend is named 'be' the directory
-/// searched is 'backend_dir'/be/libtriton_be.so.
-///
-/// \param options The server options object.
-/// \param backend_dir The full path of the backend directory.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetBackendDirectory(
-    TRITONSERVER_ServerOptions* options, const char* backend_dir);
-
-/// Set the directory containing repository agent shared libraries. This
-/// directory is searched when looking for the repository agent shared
-/// library for a model. If the backend is named 'ra' the directory
-/// searched is 'repoagent_dir'/ra/libtritonrepoagent_ra.so.
-///
-/// \param options The server options object.
-/// \param repoagent_dir The full path of the repository agent directory.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
-    TRITONSERVER_ServerOptions* options, const char* repoagent_dir);
-
-/// Specify the limit on memory usage as a fraction on the device identified by
-/// 'kind' and 'device_id'. If model loading on the device is requested and the
-/// current memory usage exceeds the limit, the load will be rejected. If not
-/// specified, the limit will not be set.
-///
-/// Currently support TRITONSERVER_INSTANCEGROUPKIND_GPU
-///
-/// \param options The server options object.
-/// \param kind The kind of the device.
-/// \param device_id The id of the device.
-/// \param fraction The limit on memory usage as a fraction
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit(
-    TRITONSERVER_ServerOptions* options,
-    const TRITONSERVER_InstanceGroupKind kind, const int device_id,
-    const double fraction);
-
-/// Set a configuration setting for a named backend in a server
-/// options.
-///
-/// \param options The server options object.
-/// \param backend_name The name of the backend.
-/// \param setting The name of the setting.
-/// \param value The setting value.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetBackendConfig(
-    TRITONSERVER_ServerOptions* options, const char* backend_name,
-    const char* setting, const char* value);
-
-/// Set a host policy setting for a given policy name in a server options.
-///
-/// \param options The server options object.
-/// \param policy_name The name of the policy.
-/// \param setting The name of the setting.
-/// \param value The setting value.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetHostPolicy(
-    TRITONSERVER_ServerOptions* options, const char* policy_name,
-    const char* setting, const char* value);
-
-/// TRITONSERVER_Server
-///
-/// An inference server.
-///
-
-/// Model batch flags. The enum values must be power-of-2 values.
-typedef enum tritonserver_batchflag_enum {
-  TRITONSERVER_BATCH_UNKNOWN = 1,
-  TRITONSERVER_BATCH_FIRST_DIM = 2
-} TRITONSERVER_ModelBatchFlag;
-
-/// Model index flags. The enum values must be power-of-2 values.
-typedef enum tritonserver_modelindexflag_enum {
-  TRITONSERVER_INDEX_FLAG_READY = 1
-} TRITONSERVER_ModelIndexFlag;
-
-/// Model transaction policy flags. The enum values must be
-/// power-of-2 values.
-typedef enum tritonserver_txn_property_flag_enum {
-  TRITONSERVER_TXN_ONE_TO_ONE = 1,
-  TRITONSERVER_TXN_DECOUPLED = 2
-} TRITONSERVER_ModelTxnPropertyFlag;
-
-/// Create a new server object. The caller takes ownership of the
-/// TRITONSERVER_Server object and must call TRITONSERVER_ServerDelete
-/// to release the object.
-///
-/// \param server Returns the new inference server object.
-/// \param options The inference server options object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerNew(
-    TRITONSERVER_Server** server, TRITONSERVER_ServerOptions* options);
-
-/// Delete a server object. If server is not already stopped it is
-/// stopped before being deleted.
-///
-/// \param server The inference server object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerDelete(
-    TRITONSERVER_Server* server);
-
-/// Stop a server object. A server can't be restarted once it is
-/// stopped.
-///
-/// \param server The inference server object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerStop(
-    TRITONSERVER_Server* server);
-
-/// Register a new model repository. Not available in polling mode.
-///
-/// \param server The inference server object.
-/// \param repository_path The full path to the model repository.
-/// \param name_mapping List of name_mapping parameters. Each mapping has
-/// the model directory name as its key, overriden model name as its value.
-/// \param model_count Number of mappings provided.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerRegisterModelRepository(
-    TRITONSERVER_Server* server, const char* repository_path,
-    const TRITONSERVER_Parameter** name_mapping, const uint32_t mapping_count);
-
-/// Unregister a model repository. Not available in polling mode.
-///
-/// \param server The inference server object.
-/// \param repository_path The full path to the model repository.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerUnregisterModelRepository(
-    TRITONSERVER_Server* server, const char* repository_path);
-
-/// Check the model repository for changes and update server state
-/// based on those changes.
-///
-/// \param server The inference server object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerPollModelRepository(TRITONSERVER_Server* server);
-
-/// Is the server live?
-///
-/// \param server The inference server object.
-/// \param live Returns true if server is live, false otherwise.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerIsLive(
-    TRITONSERVER_Server* server, bool* live);
-
-/// Is the server ready?
-///
-/// \param server The inference server object.
-/// \param ready Returns true if server is ready, false otherwise.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerIsReady(
-    TRITONSERVER_Server* server, bool* ready);
-
-/// Is the model ready?
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model to get readiness for.
-/// \param model_version The version of the model to get readiness
-/// for.  If -1 then the server will choose a version based on the
-/// model's policy.
-/// \param ready Returns true if server is ready, false otherwise.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelIsReady(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, bool* ready);
-
-/// Get the batch properties of the model. The properties are
-/// communicated by a flags value and an (optional) object returned by
-/// 'voidp'.
-///
-///   - TRITONSERVER_BATCH_UNKNOWN: Triton cannot determine the
-///     batching properties of the model. This means that the model
-///     does not support batching in any way that is useable by
-///     Triton. The returned 'voidp' value is nullptr.
-///
-///   - TRITONSERVER_BATCH_FIRST_DIM: The model supports batching
-///     along the first dimension of every input and output
-///     tensor. Triton schedulers that perform batching can
-///     automatically batch inference requests along this dimension.
-///     The returned 'voidp' value is nullptr.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \param model_version The version of the model.  If -1 then the
-/// server will choose a version based on the model's policy.
-/// \param flags Returns flags indicating the batch properties of the
-/// model.
-/// \param voidp If non-nullptr, returns a point specific to the
-/// 'flags' value.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerModelBatchProperties(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, uint32_t* flags, void** voidp);
-
-/// Get the transaction policy of the model. The policy is
-/// communicated by a flags value.
-///
-///   - TRITONSERVER_TXN_ONE_TO_ONE: The model generates exactly
-///     one response per request.
-///
-///   - TRITONSERVER_TXN_DECOUPLED: The model may generate zero
-///     to many responses per request.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \param model_version The version of the model.  If -1 then the
-/// server will choose a version based on the model's policy.
-/// \param txn_flags Returns flags indicating the transaction policy of the
-/// model.
-/// \param voidp If non-nullptr, returns a point specific to the 'flags' value.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerModelTransactionProperties(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, uint32_t* txn_flags, void** voidp);
-
-/// Get the metadata of the server as a TRITONSERVER_Message object.
-/// The caller takes ownership of the message object and must call
-/// TRITONSERVER_MessageDelete to release the object.
-///
-/// \param server The inference server object.
-/// \param server_metadata Returns the server metadata message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerMetadata(
-    TRITONSERVER_Server* server, TRITONSERVER_Message** server_metadata);
-
-/// Get the metadata of a model as a TRITONSERVER_Message
-/// object.  The caller takes ownership of the message object and must
-/// call TRITONSERVER_MessageDelete to release the object.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \param model_version The version of the model.
-/// If -1 then the server will choose a version based on the model's
-/// policy.
-/// \param model_metadata Returns the model metadata message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelMetadata(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, TRITONSERVER_Message** model_metadata);
-
-/// Get the statistics of a model as a TRITONSERVER_Message
-/// object. The caller takes ownership of the object and must call
-/// TRITONSERVER_MessageDelete to release the object.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// If empty, then statistics for all available models will be returned,
-/// and the server will choose a version based on those models' policies.
-/// \param model_version The version of the model.  If -1 then the
-/// server will choose a version based on the model's policy.
-/// \param model_stats Returns the model statistics message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelStatistics(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, TRITONSERVER_Message** model_stats);
-
-/// Get the configuration of a model as a TRITONSERVER_Message object.
-/// The caller takes ownership of the message object and must call
-/// TRITONSERVER_MessageDelete to release the object.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \param model_version The version of the model.  If -1 then the
-/// server will choose a version based on the model's policy.
-/// \param config_version The model configuration will be returned in
-/// a format matching this version. If the configuration cannot be
-/// represented in the requested version's format then an error will
-/// be returned. Currently only version 1 is supported.
-/// \param model_config Returns the model config message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelConfig(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, const uint32_t config_version,
-    TRITONSERVER_Message** model_config);
-
-/// Get the index of all unique models in the model repositories as a
-/// TRITONSERVER_Message object. The caller takes ownership of the
-/// message object and must call TRITONSERVER_MessageDelete to release
-/// the object.
-///
-/// If TRITONSERVER_INDEX_FLAG_READY is set in 'flags' only the models
-/// that are loaded into the server and ready for inferencing are
-/// returned.
-///
-/// \param server The inference server object.
-/// \param flags TRITONSERVER_ModelIndexFlag flags that control how to
-/// collect the index.
-/// \param model_index Return the model index message that holds the
-/// index of all models contained in the server's model repository(s).
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelIndex(
-    TRITONSERVER_Server* server, uint32_t flags,
-    TRITONSERVER_Message** model_index);
-
-/// Load the requested model or reload the model if it is already
-/// loaded. The function does not return until the model is loaded or
-/// fails to load. Returned error indicates if model loaded
-/// successfully or not.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerLoadModel(
-    TRITONSERVER_Server* server, const char* model_name);
-
-/// Load the requested model or reload the model if it is already
-/// loaded, with load parameters provided. The function does not return until
-/// the model is loaded or fails to load. Returned error indicates if model
-/// loaded successfully or not.
-/// Currently the below parameter names are recognized:
-/// - "config" : string parameter that contains a JSON representation of the
-/// model configuration. This config will be used for loading the model instead
-/// of the one in the model directory.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \param parameters The array of load parameters.
-/// \param parameter_count The number of parameters.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerLoadModelWithParameters(
-    TRITONSERVER_Server* server, const char* model_name,
-    const TRITONSERVER_Parameter** parameters, const uint64_t parameter_count);
-
-/// Unload the requested model. Unloading a model that is not loaded
-/// on server has no affect and success code will be returned.
-/// The function does not wait for the requested model to be fully unload
-/// and success code will be returned.
-/// Returned error indicates if model unloaded successfully or not.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerUnloadModel(
-    TRITONSERVER_Server* server, const char* model_name);
-
-/// Unload the requested model, and also unload any dependent model that
-/// was loaded along with the requested model (for example, the models composing
-/// an ensemble). Unloading a model that is not loaded
-/// on server has no affect and success code will be returned.
-/// The function does not wait for the requested model and all dependent
-/// models to be fully unload and success code will be returned.
-/// Returned error indicates if model unloaded successfully or not.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerUnloadModelAndDependents(
-    TRITONSERVER_Server* server, const char* model_name);
-
-/// Get the current metrics for the server. The caller takes ownership
-/// of the metrics object and must call TRITONSERVER_MetricsDelete to
-/// release the object.
-///
-/// \param server The inference server object.
-/// \param metrics Returns the metrics.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerMetrics(
-    TRITONSERVER_Server* server, TRITONSERVER_Metrics** metrics);
-
-/// Perform inference using the meta-data and inputs supplied by the
-/// 'inference_request'. If the function returns success, then the
-/// caller releases ownership of 'inference_request' and must not
-/// access it in any way after this call, until ownership is returned
-/// via the 'request_release_fn' callback registered in the request
-/// object with TRITONSERVER_InferenceRequestSetReleaseCallback.
-///
-/// The function unconditionally takes ownership of 'trace' and so the
-/// caller must not access it in any way after this call (except in
-/// the trace activity callbacks) until ownership is returned via the
-/// trace's release_fn callback.
-///
-/// Responses produced for this request are returned using the
-/// allocator and callback registered with the request by
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-///
-/// \param server The inference server object.
-/// \param inference_request The request object.
-/// \param trace The trace object for this request, or nullptr if no
-/// tracing.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerInferAsync(
-    TRITONSERVER_Server* server,
-    TRITONSERVER_InferenceRequest* inference_request,
-    TRITONSERVER_InferenceTrace* trace);
-
-/// TRITONSERVER_MetricKind
-///
-/// Types of metrics recognized by TRITONSERVER.
-///
-typedef enum TRITONSERVER_metrickind_enum {
-  TRITONSERVER_METRIC_KIND_COUNTER,
-  TRITONSERVER_METRIC_KIND_GAUGE
-} TRITONSERVER_MetricKind;
-
-/// Create a new metric family object. The caller takes ownership of the
-/// TRITONSERVER_MetricFamily object and must call
-/// TRITONSERVER_MetricFamilyDelete to release the object.
-///
-/// \param family Returns the new metric family object.
-/// \param kind The type of metric family to create.
-/// \param name The name of the metric family seen when calling the metrics
-/// endpoint.
-/// \param description The description of the metric family seen when
-/// calling the metrics endpoint.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricFamilyNew(
-    TRITONSERVER_MetricFamily** family, const TRITONSERVER_MetricKind kind,
-    const char* name, const char* description);
-
-/// Delete a metric family object.
-/// A TRITONSERVER_MetricFamily* object should be deleted AFTER its
-/// corresponding TRITONSERVER_Metric* objects have been deleted.
-/// Attempting to delete a family before its metrics will return an error.
-///
-/// \param family The metric family object to delete.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricFamilyDelete(
-    TRITONSERVER_MetricFamily* family);
-
-/// Create a new metric object. The caller takes ownership of the
-/// TRITONSERVER_Metric object and must call
-/// TRITONSERVER_MetricDelete to release the object. The caller is also
-/// responsible for ownership of the labels passed in. Each label can be deleted
-/// immediately after creating the metric with TRITONSERVER_ParameterDelete
-/// if not re-using the labels.
-///
-/// \param metric Returns the new metric object.
-/// \param family The metric family to add this new metric to.
-/// \param labels The array of labels to associate with this new metric.
-/// \param label_count The number of labels.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricNew(
-    TRITONSERVER_Metric** metric, TRITONSERVER_MetricFamily* family,
-    const TRITONSERVER_Parameter** labels, const uint64_t label_count);
-
-/// Delete a metric object.
-/// All TRITONSERVER_Metric* objects should be deleted BEFORE their
-/// corresponding TRITONSERVER_MetricFamily* objects have been deleted.
-/// If a family is deleted before its metrics, an error will be returned.
-///
-/// \param metric The metric object to delete.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricDelete(
-    TRITONSERVER_Metric* metric);
-
-/// Get the current value of a metric object.
-/// Supports metrics of kind TRITONSERVER_METRIC_KIND_COUNTER
-/// and TRITONSERVER_METRIC_KIND_GAUGE, and returns
-/// TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.
-///
-/// \param metric The metric object to query.
-/// \param value Returns the current value of the metric object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricValue(
-    TRITONSERVER_Metric* metric, double* value);
-
-/// Increment the current value of metric by value.
-/// Supports metrics of kind TRITONSERVER_METRIC_KIND_GAUGE for any value,
-/// and TRITONSERVER_METRIC_KIND_COUNTER for non-negative values. Returns
-/// TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind
-/// and TRITONSERVER_ERROR_INVALID_ARG for negative values on a
-/// TRITONSERVER_METRIC_KIND_COUNTER metric.
-///
-/// \param metric The metric object to update.
-/// \param value The amount to increment the metric's value by.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricIncrement(
-    TRITONSERVER_Metric* metric, double value);
-
-/// Set the current value of metric to value.
-/// Supports metrics of kind TRITONSERVER_METRIC_KIND_GAUGE and returns
-/// TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.
-///
-/// \param metric The metric object to update.
-/// \param value The amount to set metric's value to.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricSet(
-    TRITONSERVER_Metric* metric, double value);
-
-/// Get the TRITONSERVER_MetricKind of metric and its corresponding family.
-///
-/// \param metric The metric object to query.
-/// \param kind Returns the TRITONSERVER_MetricKind of metric.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_GetMetricKind(
-    TRITONSERVER_Metric* metric, TRITONSERVER_MetricKind* kind);
-
-#ifdef __cplusplus
-}
-#endif
--- a/3rdparty/core-r22.12/src/backend_config.cc
+++ b/3rdparty/core-r22.12/src/backend_config.cc
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "backend_config.h"
-
-#include "status.h"
-#include "triton/common/logging.h"
-#include "triton/common/model_config.h"
-
-namespace triton { namespace core {
-
-namespace {
-
-Status
-GetTFSpecializedBackendName(
-    const triton::common::BackendCmdlineConfigMap& config_map,
-    std::string* specialized_name)
-{
-  std::string tf_version_str = "2";
-  const auto& itr = config_map.find("tensorflow");
-  if (itr != config_map.end()) {
-    if (BackendConfiguration(itr->second, "version", &tf_version_str).IsOk()) {
-      if ((tf_version_str != "1") && (tf_version_str != "2")) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "unexpected TensorFlow library version '" + tf_version_str +
-                "', expects 1 or 2.");
-      }
-    }
-  }
-
-  *specialized_name += tf_version_str;
-
-  return Status::Success;
-}
-}  // namespace
-
-Status
-BackendConfiguration(
-    const triton::common::BackendCmdlineConfig& config, const std::string& key,
-    std::string* val)
-{
-  for (const auto& pr : config) {
-    if (pr.first == key) {
-      *val = pr.second;
-      return Status::Success;
-    }
-  }
-
-  return Status(
-      Status::Code::INTERNAL,
-      std::string("unable to find common backend configuration for '") + key +
-          "'");
-}
-
-Status
-BackendConfigurationParseStringToDouble(const std::string& str, double* val)
-{
-  try {
-    *val = std::stod(str);
-  }
-  catch (...) {
-    return Status(
-        Status::Code::INTERNAL,
-        "unable to parse common backend configuration as double");
-  }
-
-  return Status::Success;
-}
-
-Status
-BackendConfigurationParseStringToBool(const std::string& str, bool* val)
-{
-  try {
-    std::string lowercase_str{str};
-    std::transform(
-        lowercase_str.begin(), lowercase_str.end(), lowercase_str.begin(),
-        [](unsigned char c) { return std::tolower(c); });
-    *val = (lowercase_str == "true");
-  }
-  catch (...) {
-    return Status(
-        Status::Code::INTERNAL,
-        "unable to parse common backend configuration as bool");
-  }
-
-  return Status::Success;
-}
-
-Status
-BackendConfigurationGlobalBackendsDirectory(
-    const triton::common::BackendCmdlineConfigMap& config_map, std::string* dir)
-{
-  const auto& itr = config_map.find(std::string());
-  if (itr == config_map.end()) {
-    return Status(
-        Status::Code::INTERNAL,
-        "unable to find global backends directory configuration");
-  }
-
-  RETURN_IF_ERROR(BackendConfiguration(itr->second, "backend-directory", dir));
-
-  return Status::Success;
-}
-
-Status
-BackendConfigurationMinComputeCapability(
-    const triton::common::BackendCmdlineConfigMap& config_map, double* mcc)
-{
-#ifdef TRITON_ENABLE_GPU
-  *mcc = TRITON_MIN_COMPUTE_CAPABILITY;
-#else
-  *mcc = 0;
-#endif  // TRITON_ENABLE_GPU
-
-  const auto& itr = config_map.find(std::string());
-  if (itr == config_map.end()) {
-    return Status(
-        Status::Code::INTERNAL, "unable to find common backend configuration");
-  }
-
-  std::string min_compute_capability_str;
-  RETURN_IF_ERROR(BackendConfiguration(
-      itr->second, "min-compute-capability", &min_compute_capability_str));
-  RETURN_IF_ERROR(
-      BackendConfigurationParseStringToDouble(min_compute_capability_str, mcc));
-
-  return Status::Success;
-}
-
-Status
-BackendConfigurationAutoCompleteConfig(
-    const triton::common::BackendCmdlineConfigMap& config_map, bool* acc)
-{
-  const auto& itr = config_map.find(std::string());
-  if (itr == config_map.end()) {
-    return Status(
-        Status::Code::INTERNAL, "unable to find auto-complete configuration");
-  }
-
-  std::string auto_complete_config_str;
-  RETURN_IF_ERROR(BackendConfiguration(
-      itr->second, "auto-complete-config", &auto_complete_config_str));
-  RETURN_IF_ERROR(
-      BackendConfigurationParseStringToBool(auto_complete_config_str, acc));
-
-  return Status::Success;
-}
-
-Status
-BackendConfigurationSpecializeBackendName(
-    const triton::common::BackendCmdlineConfigMap& config_map,
-    const std::string& backend_name, std::string* specialized_name)
-{
-  *specialized_name = backend_name;
-  if (backend_name == "tensorflow") {
-    RETURN_IF_ERROR(GetTFSpecializedBackendName(config_map, specialized_name));
-  }
-
-  return Status::Success;
-}
-
-Status
-BackendConfigurationBackendLibraryName(
-    const std::string& backend_name, std::string* libname)
-{
-#ifdef _WIN32
-  *libname = "triton_" + backend_name + ".dll";
-#else
-  *libname = "libtriton_" + backend_name + ".so";
-#endif
-
-  return Status::Success;
-}
-
-Status
-BackendConfigurationModelLoadGpuFraction(
-    const triton::common::BackendCmdlineConfigMap& config_map,
-    const int device_id, double* memory_limit)
-{
-  *memory_limit = 1.0;
-  const auto& itr = config_map.find(std::string());
-  if (itr == config_map.end()) {
-    return Status(
-        Status::Code::INTERNAL,
-        "unable to find global backends directory configuration");
-  }
-
-  static std::string key_prefix = "model-load-gpu-limit-device-";
-  std::string memory_limit_str;
-  auto status = BackendConfiguration(
-      itr->second, key_prefix + std::to_string(device_id), &memory_limit_str);
-  // Allow missing key, default to 1.0 (no limit) if the limit is not specified
-  if (status.IsOk()) {
-    RETURN_IF_ERROR(BackendConfigurationParseStringToDouble(
-        memory_limit_str, memory_limit));
-  }
-
-  return Status::Success;
-}
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/backend_config.h
+++ b/3rdparty/core-r22.12/src/backend_config.h
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "status.h"
-#include "triton/common/model_config.h"
-
-namespace triton { namespace core {
-
-/// Get a key's string value from a backend configuration.
-Status BackendConfiguration(
-    const triton::common::BackendCmdlineConfig& config, const std::string& key,
-    std::string* val);
-
-/// Convert a backend configuration string  value into a double.
-Status BackendConfigurationParseStringToDouble(
-    const std::string& str, double* val);
-
-/// Convert a backend configuration string  value into a bool.
-Status BackendConfigurationParseStringToBool(const std::string& str, bool* val);
-
-/// Get the global backends directory from the backend configuration.
-Status BackendConfigurationGlobalBackendsDirectory(
-    const triton::common::BackendCmdlineConfigMap& config_map,
-    std::string* dir);
-
-/// Get the minimum compute capability from the backend configuration.
-Status BackendConfigurationMinComputeCapability(
-    const triton::common::BackendCmdlineConfigMap& config_map, double* mcc);
-
-/// Get the model configuration auto-complete setting from the backend
-/// configuration.
-Status BackendConfigurationAutoCompleteConfig(
-    const triton::common::BackendCmdlineConfigMap& config_map, bool* acc);
-
-/// Convert a backend name to the specialized version of that name
-/// based on the backend configuration. For example, "tensorflow" will
-/// convert to either "tensorflow1" or "tensorflow2" depending on how
-/// tritonserver is run.
-Status BackendConfigurationSpecializeBackendName(
-    const triton::common::BackendCmdlineConfigMap& config_map,
-    const std::string& backend_name, std::string* specialized_name);
-
-/// Return the shared library name for a backend.
-Status BackendConfigurationBackendLibraryName(
-    const std::string& backend_name, std::string* libname);
-
-/// Get GPU memory limit fraction for model loading
-/// from the backend configuration.
-Status BackendConfigurationModelLoadGpuFraction(
-    const triton::common::BackendCmdlineConfigMap& config_map,
-    const int device_id, double* memory_limit);
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/backend_manager.cc
+++ b/3rdparty/core-r22.12/src/backend_manager.cc
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "backend_manager.h"
-
-#include "backend_memory_manager.h"
-#include "server_message.h"
-#include "shared_library.h"
-#include "triton/common/logging.h"
-
-// For unknown reason, windows will not export the TRITONBACKEND_*
-// functions declared with dllexport in tritonbackend.h. To get those
-// functions exported it is (also?) necessary to mark the definitions
-// in this file with dllexport as well.
-#if defined(_MSC_VER)
-#define TRITONAPI_DECLSPEC __declspec(dllexport)
-#elif defined(__GNUC__)
-#define TRITONAPI_DECLSPEC __attribute__((__visibility__("default")))
-#else
-#define TRITONAPI_DECLSPEC
-#endif
-
-namespace triton { namespace core {
-
-//
-// TritonBackend
-//
-Status
-TritonBackend::Create(
-    const std::string& name, const std::string& dir, const std::string& libpath,
-    const triton::common::BackendCmdlineConfig& backend_cmdline_config,
-    std::shared_ptr<TritonBackend>* backend)
-{
-  // Create the JSON representation of the backend configuration.
-  triton::common::TritonJson::Value backend_config_json(
-      triton::common::TritonJson::ValueType::OBJECT);
-  if (!backend_cmdline_config.empty()) {
-    triton::common::TritonJson::Value cmdline_json(
-        backend_config_json, triton::common::TritonJson::ValueType::OBJECT);
-    for (const auto& pr : backend_cmdline_config) {
-      RETURN_IF_ERROR(cmdline_json.AddString(pr.first.c_str(), pr.second));
-    }
-
-    RETURN_IF_ERROR(
-        backend_config_json.Add("cmdline", std::move(cmdline_json)));
-  }
-
-  TritonServerMessage backend_config(backend_config_json);
-
-  auto local_backend = std::shared_ptr<TritonBackend>(
-      new TritonBackend(name, dir, libpath, backend_config));
-
-  // Load the library and initialize all the entrypoints
-  RETURN_IF_ERROR(local_backend->LoadBackendLibrary());
-
-  // Backend initialization is optional... The TRITONBACKEND_Backend
-  // object is this TritonBackend object. We must set set shared
-  // library path to point to the backend directory in case the
-  // backend library attempts to load additional shared libaries.
-  if (local_backend->backend_init_fn_ != nullptr) {
-    std::unique_ptr<SharedLibrary> slib;
-    RETURN_IF_ERROR(SharedLibrary::Acquire(&slib));
-    RETURN_IF_ERROR(slib->SetLibraryDirectory(local_backend->dir_));
-
-    TRITONSERVER_Error* err = local_backend->backend_init_fn_(
-        reinterpret_cast<TRITONBACKEND_Backend*>(local_backend.get()));
-
-    RETURN_IF_ERROR(slib->ResetLibraryDirectory());
-    RETURN_IF_TRITONSERVER_ERROR(err);
-  }
-
-  local_backend->UpdateAttributes();
-
-  *backend = std::move(local_backend);
-  return Status::Success;
-}
-
-Status
-TritonBackend::UpdateAttributes()
-{
-  if (backend_attri_fn_ == nullptr) {
-    return Status::Success;
-  }
-
-  // Create an Attribute object for the backend to fill, note that it copies
-  // some fields from 'attributes_' while the others use default value. This
-  // is an ad hoc way to determine whether the attribute is set by the backend
-  // and keep / update current value.
-  Attribute latest;
-  latest.exec_policy_ = attributes_.exec_policy_;
-  RETURN_IF_TRITONSERVER_ERROR(backend_attri_fn_(
-      reinterpret_cast<TRITONBACKEND_Backend*>(this),
-      reinterpret_cast<TRITONBACKEND_BackendAttribute*>(&latest)));
-
-  // Update attributes that were set
-  attributes_.exec_policy_ = latest.exec_policy_;
-  if (!latest.preferred_groups_.empty()) {
-    attributes_.preferred_groups_ = latest.preferred_groups_;
-  }
-  return Status::Success;
-}
-
-TritonBackend::TritonBackend(
-    const std::string& name, const std::string& dir, const std::string& libpath,
-    const TritonServerMessage& backend_config)
-    : name_(name), dir_(dir), libpath_(libpath),
-      backend_config_(backend_config), state_(nullptr)
-{
-  ClearHandles();
-}
-
-TritonBackend::~TritonBackend()
-{
-  LOG_VERBOSE(1) << "unloading backend '" << name_ << "'";
-
-  // Backend finalization is optional... The TRITONBACKEND_Backend
-  // object is this TritonBackend object.
-  if (backend_fini_fn_ != nullptr) {
-    LOG_TRITONSERVER_ERROR(
-        backend_fini_fn_(reinterpret_cast<TRITONBACKEND_Backend*>(this)),
-        "failed finalizing backend");
-  }
-
-  ClearHandles();
-}
-
-void
-TritonBackend::ClearHandles()
-{
-  dlhandle_ = nullptr;
-  backend_init_fn_ = nullptr;
-  backend_fini_fn_ = nullptr;
-  backend_attri_fn_ = nullptr;
-  model_init_fn_ = nullptr;
-  model_fini_fn_ = nullptr;
-  inst_init_fn_ = nullptr;
-  inst_fini_fn_ = nullptr;
-  inst_exec_fn_ = nullptr;
-}
-
-Status
-TritonBackend::LoadBackendLibrary()
-{
-  TritonBackendInitFn_t bifn;
-  TritonBackendFiniFn_t bffn;
-  TritonBackendAttriFn_t bafn;
-  TritonModelInitFn_t mifn;
-  TritonModelFiniFn_t mffn;
-  TritonModelInstanceInitFn_t iifn;
-  TritonModelInstanceFiniFn_t iffn;
-  TritonModelInstanceExecFn_t iefn;
-
-  {
-    std::unique_ptr<SharedLibrary> slib;
-    RETURN_IF_ERROR(SharedLibrary::Acquire(&slib));
-
-    RETURN_IF_ERROR(slib->OpenLibraryHandle(libpath_, &dlhandle_));
-
-    // Backend initialize and finalize functions, optional
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_Initialize", true /* optional */,
-        reinterpret_cast<void**>(&bifn)));
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_Finalize", true /* optional */,
-        reinterpret_cast<void**>(&bffn)));
-    // Backend attribute function, optional
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_GetBackendAttribute", true /* optional */,
-        reinterpret_cast<void**>(&bafn)));
-
-    // Model initialize and finalize functions, optional
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_ModelInitialize", true /* optional */,
-        reinterpret_cast<void**>(&mifn)));
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_ModelFinalize", true /* optional */,
-        reinterpret_cast<void**>(&mffn)));
-
-    // Model instance initialize and finalize functions, optional
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_ModelInstanceInitialize", true /* optional */,
-        reinterpret_cast<void**>(&iifn)));
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_ModelInstanceFinalize", true /* optional */,
-        reinterpret_cast<void**>(&iffn)));
-
-    // Model instance execute function, required
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_ModelInstanceExecute", false /* optional */,
-        reinterpret_cast<void**>(&iefn)));
-  }
-
-  backend_init_fn_ = bifn;
-  backend_fini_fn_ = bffn;
-  backend_attri_fn_ = bafn;
-  model_init_fn_ = mifn;
-  model_fini_fn_ = mffn;
-  inst_init_fn_ = iifn;
-  inst_fini_fn_ = iffn;
-  inst_exec_fn_ = iefn;
-
-  return Status::Success;
-}
-
-extern "C" {
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ApiVersion(uint32_t* major, uint32_t* minor)
-{
-  *major = TRITONBACKEND_API_VERSION_MAJOR;
-  *minor = TRITONBACKEND_API_VERSION_MINOR;
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendName(TRITONBACKEND_Backend* backend, const char** name)
-{
-  TritonBackend* tb = reinterpret_cast<TritonBackend*>(backend);
-  *name = tb->Name().c_str();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendConfig(
-    TRITONBACKEND_Backend* backend, TRITONSERVER_Message** backend_config)
-{
-  TritonBackend* tb = reinterpret_cast<TritonBackend*>(backend);
-  *backend_config = const_cast<TRITONSERVER_Message*>(
-      reinterpret_cast<const TRITONSERVER_Message*>(&tb->BackendConfig()));
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendExecutionPolicy(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_ExecutionPolicy* policy)
-{
-  TritonBackend* tb = reinterpret_cast<TritonBackend*>(backend);
-  *policy = tb->ExecutionPolicy();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendSetExecutionPolicy(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_ExecutionPolicy policy)
-{
-  TritonBackend* tb = reinterpret_cast<TritonBackend*>(backend);
-  tb->SetExecutionPolicy(policy);
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendArtifacts(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_ArtifactType* artifact_type,
-    const char** location)
-{
-  TritonBackend* tb = reinterpret_cast<TritonBackend*>(backend);
-  *artifact_type = TRITONBACKEND_ARTIFACT_FILESYSTEM;
-  *location = tb->Directory().c_str();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendMemoryManager(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_MemoryManager** manager)
-{
-  static TritonMemoryManager gMemoryManager;
-  *manager = reinterpret_cast<TRITONBACKEND_MemoryManager*>(&gMemoryManager);
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendState(TRITONBACKEND_Backend* backend, void** state)
-{
-  TritonBackend* tb = reinterpret_cast<TritonBackend*>(backend);
-  *state = tb->State();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendSetState(TRITONBACKEND_Backend* backend, void* state)
-{
-  TritonBackend* tb = reinterpret_cast<TritonBackend*>(backend);
-  tb->SetState(state);
-  return nullptr;  // success
-}
-
-}  // extern C
-
-//
-// TritonBackendManager
-//
-
-static std::weak_ptr<TritonBackendManager> backend_manager_;
-static std::mutex mu_;
-
-Status
-TritonBackendManager::Create(std::shared_ptr<TritonBackendManager>* manager)
-{
-  std::lock_guard<std::mutex> lock(mu_);
-
-  // If there is already a manager then we just use it...
-  *manager = backend_manager_.lock();
-  if (*manager != nullptr) {
-    return Status::Success;
-  }
-
-  manager->reset(new TritonBackendManager());
-  backend_manager_ = *manager;
-
-  return Status::Success;
-}
-
-Status
-TritonBackendManager::CreateBackend(
-    const std::string& name, const std::string& dir, const std::string& libpath,
-    const triton::common::BackendCmdlineConfig& backend_cmdline_config,
-    std::shared_ptr<TritonBackend>* backend)
-{
-  std::lock_guard<std::mutex> lock(mu_);
-
-  const auto& itr = backend_map_.find(libpath);
-  if (itr != backend_map_.end()) {
-    *backend = itr->second;
-    return Status::Success;
-  }
-
-  RETURN_IF_ERROR(TritonBackend::Create(
-      name, dir, libpath, backend_cmdline_config, backend));
-  backend_map_.insert({libpath, *backend});
-
-  return Status::Success;
-}
-
-Status
-TritonBackendManager::BackendState(
-    std::unique_ptr<std::unordered_map<std::string, std::vector<std::string>>>*
-        backend_state)
-{
-  std::lock_guard<std::mutex> lock(mu_);
-
-  std::unique_ptr<std::unordered_map<std::string, std::vector<std::string>>>
-      backend_state_map(
-          new std::unordered_map<std::string, std::vector<std::string>>);
-  for (const auto& backend_pair : backend_map_) {
-    auto& libpath = backend_pair.first;
-    auto backend = backend_pair.second;
-
-    const char* backend_config;
-    size_t backend_config_size;
-    backend->BackendConfig().Serialize(&backend_config, &backend_config_size);
-    backend_state_map->insert(
-        {backend->Name(), std::vector<std::string>{libpath, backend_config}});
-  }
-
-  *backend_state = std::move(backend_state_map);
-
-  return Status::Success;
-}
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/backend_manager.h
+++ b/3rdparty/core-r22.12/src/backend_manager.h
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <memory>
-#include <mutex>
-#include <string>
-#include <unordered_map>
-#include "constants.h"
-#include "server_message.h"
-#include "status.h"
-#include "triton/common/model_config.h"
-#include "tritonserver_apis.h"
-
-namespace triton { namespace core {
-
-//
-// Proxy to a backend shared library.
-//
-class TritonBackend {
- public:
-  struct Attribute {
-    Attribute() : exec_policy_(TRITONBACKEND_EXECUTION_BLOCKING) {}
-    TRITONBACKEND_ExecutionPolicy exec_policy_;
-    std::vector<inference::ModelInstanceGroup> preferred_groups_;
-  };
-  typedef TRITONSERVER_Error* (*TritonModelInitFn_t)(
-      TRITONBACKEND_Model* model);
-  typedef TRITONSERVER_Error* (*TritonModelFiniFn_t)(
-      TRITONBACKEND_Model* model);
-  typedef TRITONSERVER_Error* (*TritonModelInstanceInitFn_t)(
-      TRITONBACKEND_ModelInstance* instance);
-  typedef TRITONSERVER_Error* (*TritonModelInstanceFiniFn_t)(
-      TRITONBACKEND_ModelInstance* instance);
-  typedef TRITONSERVER_Error* (*TritonModelInstanceExecFn_t)(
-      TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
-      const uint32_t request_cnt);
-
-  static Status Create(
-      const std::string& name, const std::string& dir,
-      const std::string& libpath,
-      const triton::common::BackendCmdlineConfig& backend_cmdline_config,
-      std::shared_ptr<TritonBackend>* backend);
-  ~TritonBackend();
-
-  const std::string& Name() const { return name_; }
-  const std::string& Directory() const { return dir_; }
-  const TritonServerMessage& BackendConfig() const { return backend_config_; }
-  const Attribute& BackendAttributes() const { return attributes_; }
-
-  TRITONBACKEND_ExecutionPolicy ExecutionPolicy() const
-  {
-    return attributes_.exec_policy_;
-  }
-  void SetExecutionPolicy(const TRITONBACKEND_ExecutionPolicy policy)
-  {
-    attributes_.exec_policy_ = policy;
-  }
-
-  void* State() { return state_; }
-  void SetState(void* state) { state_ = state; }
-
-  TritonModelInitFn_t ModelInitFn() const { return model_init_fn_; }
-  TritonModelFiniFn_t ModelFiniFn() const { return model_fini_fn_; }
-  TritonModelInstanceInitFn_t ModelInstanceInitFn() const
-  {
-    return inst_init_fn_;
-  }
-  TritonModelInstanceFiniFn_t ModelInstanceFiniFn() const
-  {
-    return inst_fini_fn_;
-  }
-  TritonModelInstanceExecFn_t ModelInstanceExecFn() const
-  {
-    return inst_exec_fn_;
-  }
-
- private:
-  typedef TRITONSERVER_Error* (*TritonBackendInitFn_t)(
-      TRITONBACKEND_Backend* backend);
-  typedef TRITONSERVER_Error* (*TritonBackendFiniFn_t)(
-      TRITONBACKEND_Backend* backend);
-  typedef TRITONSERVER_Error* (*TritonBackendAttriFn_t)(
-      TRITONBACKEND_Backend* backend,
-      TRITONBACKEND_BackendAttribute* backend_attributes);
-
-  TritonBackend(
-      const std::string& name, const std::string& dir,
-      const std::string& libpath, const TritonServerMessage& backend_config);
-
-  void ClearHandles();
-  Status LoadBackendLibrary();
-
-  Status UpdateAttributes();
-
-  // The name of the backend.
-  const std::string name_;
-
-  // Full path to the directory holding backend shared library and
-  // other artifacts.
-  const std::string dir_;
-
-  // Full path to the backend shared library.
-  const std::string libpath_;
-
-  // Backend configuration as JSON
-  TritonServerMessage backend_config_;
-
-  // backend attributes
-  Attribute attributes_;
-
-  // dlopen / dlsym handles
-  void* dlhandle_;
-  TritonBackendInitFn_t backend_init_fn_;
-  TritonBackendFiniFn_t backend_fini_fn_;
-  TritonBackendAttriFn_t backend_attri_fn_;
-  TritonModelInitFn_t model_init_fn_;
-  TritonModelFiniFn_t model_fini_fn_;
-  TritonModelInstanceInitFn_t inst_init_fn_;
-  TritonModelInstanceFiniFn_t inst_fini_fn_;
-  TritonModelInstanceExecFn_t inst_exec_fn_;
-
-  // Opaque state associated with the backend.
-  void* state_;
-};
-
-//
-// Manage communication with Triton backends and their lifecycle.
-//
-class TritonBackendManager {
- public:
-  static Status Create(std::shared_ptr<TritonBackendManager>* manager);
-
-  Status CreateBackend(
-      const std::string& name, const std::string& dir,
-      const std::string& libpath,
-      const triton::common::BackendCmdlineConfig& backend_cmdline_config,
-      std::shared_ptr<TritonBackend>* backend);
-
-  Status BackendState(
-      std::unique_ptr<
-          std::unordered_map<std::string, std::vector<std::string>>>*
-          backend_state);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(TritonBackendManager);
-  TritonBackendManager() = default;
-  std::unordered_map<std::string, std::shared_ptr<TritonBackend>> backend_map_;
-};
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/backend_memory_manager.cc
+++ b/3rdparty/core-r22.12/src/backend_memory_manager.cc
-// Copyright 2020-2022, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "backend_memory_manager.h"
-
-#include "pinned_memory_manager.h"
-#include "status.h"
-#include "tritonserver_apis.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include <cuda_runtime_api.h>
-#include "cuda_memory_manager.h"
-#endif  // TRITON_ENABLE_GPU
-
-// For unknown reason, windows will not export the TRITONBACKEND_*
-// functions declared with dllexport in tritonbackend.h. To get those
-// functions exported it is (also?) necessary to mark the definitions
-// in this file with dllexport as well.
-#if defined(_MSC_VER)
-#define TRITONAPI_DECLSPEC __declspec(dllexport)
-#elif defined(__GNUC__)
-#define TRITONAPI_DECLSPEC __attribute__((__visibility__("default")))
-#else
-#define TRITONAPI_DECLSPEC
-#endif
-
-namespace triton { namespace core {
-
-extern "C" {
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_MemoryManagerAllocate(
-    TRITONBACKEND_MemoryManager* manager, void** buffer,
-    const TRITONSERVER_MemoryType memory_type, const int64_t memory_type_id,
-    const uint64_t byte_size)
-{
-  switch (memory_type) {
-    case TRITONSERVER_MEMORY_GPU:
-#ifdef TRITON_ENABLE_GPU
-    {
-      auto status = CudaMemoryManager::Alloc(buffer, byte_size, memory_type_id);
-      if (!status.IsOk()) {
-        return TRITONSERVER_ErrorNew(
-            StatusCodeToTritonCode(status.ErrorCode()),
-            status.Message().c_str());
-      }
-      break;
-    }
-#else
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED,
-          "GPU memory allocation not supported");
-#endif  // TRITON_ENABLE_GPU
-
-    case TRITONSERVER_MEMORY_CPU_PINNED:
-#ifdef TRITON_ENABLE_GPU
-    {
-      TRITONSERVER_MemoryType mt = memory_type;
-      auto status = PinnedMemoryManager::Alloc(buffer, byte_size, &mt, false);
-      if (!status.IsOk()) {
-        return TRITONSERVER_ErrorNew(
-            StatusCodeToTritonCode(status.ErrorCode()),
-            status.Message().c_str());
-      }
-      break;
-    }
-#else
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED,
-          "Pinned memory allocation not supported");
-#endif  // TRITON_ENABLE_GPU
-
-    case TRITONSERVER_MEMORY_CPU: {
-      *buffer = malloc(byte_size);
-      if (*buffer == nullptr) {
-        return TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_UNAVAILABLE, "CPU memory allocation failed");
-      }
-      break;
-    }
-  }
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_MemoryManagerFree(
-    TRITONBACKEND_MemoryManager* manager, void* buffer,
-    const TRITONSERVER_MemoryType memory_type, const int64_t memory_type_id)
-{
-  switch (memory_type) {
-    case TRITONSERVER_MEMORY_GPU: {
-#ifdef TRITON_ENABLE_GPU
-      auto status = CudaMemoryManager::Free(buffer, memory_type_id);
-      if (!status.IsOk()) {
-        return TRITONSERVER_ErrorNew(
-            StatusCodeToTritonCode(status.StatusCode()),
-            status.Message().c_str());
-      }
-#endif  // TRITON_ENABLE_GPU
-      break;
-    }
-
-    case TRITONSERVER_MEMORY_CPU_PINNED: {
-#ifdef TRITON_ENABLE_GPU
-      auto status = PinnedMemoryManager::Free(buffer);
-      if (!status.IsOk()) {
-        return TRITONSERVER_ErrorNew(
-            StatusCodeToTritonCode(status.StatusCode()),
-            status.Message().c_str());
-      }
-#endif  // TRITON_ENABLE_GPU
-      break;
-    }
-
-    case TRITONSERVER_MEMORY_CPU:
-      free(buffer);
-      break;
-  }
-
-  return nullptr;  // success
-}
-
-}  // extern C
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/backend_memory_manager.h
+++ b/3rdparty/core-r22.12/src/backend_memory_manager.h
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-namespace triton { namespace core {
-
-// Currently there is just a global memory manager that is used for
-// all backends and which simply forwards requests on to the core
-// memory manager.
-struct TritonMemoryManager {
-};
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/backend_model.cc
+++ b/3rdparty/core-r22.12/src/backend_model.cc
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "backend_model.h"
-
-#include <vector>
-#include "backend_config.h"
-#include "backend_model_instance.h"
-#include "dynamic_batch_scheduler.h"
-#include "filesystem.h"
-#include "model_config_utils.h"
-#include "numa_utils.h"
-#include "sequence_batch_scheduler.h"
-#include "sequence_state.h"
-#include "server.h"
-#include "server_message.h"
-#include "shared_library.h"
-#include "triton/common/logging.h"
-#include "tritonserver_apis.h"
-
-// For unknown reason, windows will not export the TRITONBACKEND_*
-// functions declared with dllexport in tritonbackend.h. To get those
-// functions exported it is (also?) necessary to mark the definitions
-// in this file with dllexport as well.
-#if defined(_MSC_VER)
-#define TRITONAPI_DECLSPEC __declspec(dllexport)
-#elif defined(__GNUC__)
-#define TRITONAPI_DECLSPEC __attribute__((__visibility__("default")))
-#else
-#define TRITONAPI_DECLSPEC
-#endif
-
-namespace triton { namespace core {
-
-Status
-TritonModel::Create(
-    InferenceServer* server, const std::string& model_path,
-    const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
-    const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
-    const std::string& model_name, const int64_t version,
-    inference::ModelConfig model_config, const bool is_config_provided,
-    std::unique_ptr<TritonModel>* model)
-{
-  model->reset();
-
-  // The model configuration must specify a backend. The name of the
-  // corresponding shared library must be libtriton_<backend>.so.
-  if (model_config.backend().empty()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "must specify 'backend' for '" + model_config.name() + "'");
-  }
-
-  // Localize the content of the model repository corresponding to
-  // 'model_name'. This model holds a handle to the localized content
-  // so that it persists as long as the model is loaded.
-  std::shared_ptr<LocalizedPath> localized_model_dir;
-  RETURN_IF_ERROR(LocalizePath(model_path, &localized_model_dir));
-
-  // Localize paths in backend model config
-  // [FIXME] Remove once a more permanent solution is implemented (DLIS-4211)
-  RETURN_IF_ERROR(LocalizePythonBackendExecutionEnvironmentPath(
-      model_path, &model_config, &localized_model_dir));
-
-  // Get some internal configuration values needed for initialization.
-  std::string backend_dir;
-  RETURN_IF_ERROR(BackendConfigurationGlobalBackendsDirectory(
-      backend_cmdline_config_map, &backend_dir));
-
-  bool auto_complete_config = false;
-  RETURN_IF_ERROR(BackendConfigurationAutoCompleteConfig(
-      backend_cmdline_config_map, &auto_complete_config));
-
-  double min_compute_capability = 0;
-  RETURN_IF_ERROR(BackendConfigurationMinComputeCapability(
-      backend_cmdline_config_map, &min_compute_capability));
-
-  std::string specialized_backend_name;
-  RETURN_IF_ERROR(BackendConfigurationSpecializeBackendName(
-      backend_cmdline_config_map, model_config.backend(),
-      &specialized_backend_name));
-
-  std::string backend_libname;
-  RETURN_IF_ERROR(BackendConfigurationBackendLibraryName(
-      specialized_backend_name, &backend_libname));
-
-  // Get the path to the backend shared library. Search path is
-  // version directory, model directory, global backend directory.
-  const auto localized_model_path = localized_model_dir->Path();
-  const auto version_path =
-      JoinPath({localized_model_path, std::to_string(version)});
-  const std::string global_path =
-      JoinPath({backend_dir, specialized_backend_name});
-  const std::vector<std::string> search_paths = {
-      version_path, localized_model_path, global_path};
-
-  std::string backend_libdir;
-  std::string backend_libpath;
-  for (const auto& path : search_paths) {
-    const auto full_path = JoinPath({path, backend_libname});
-    bool exists = false;
-    RETURN_IF_ERROR(FileExists(full_path, &exists));
-    if (exists) {
-      backend_libdir = path;
-      backend_libpath = full_path;
-      break;
-    }
-  }
-
-  if (backend_libpath.empty()) {
-    return Status(
-        Status::Code::INVALID_ARG, "unable to find '" + backend_libname +
-                                       "' for model '" + model_config.name() +
-                                       "', searched: " + version_path + ", " +
-                                       model_path + ", " + global_path);
-  }
-
-  // Resolve the global backend configuration with the specific backend
-  // configuration
-  triton::common::BackendCmdlineConfig config;
-  RETURN_IF_ERROR(ResolveBackendConfigs(
-      backend_cmdline_config_map, model_config.backend(), config));
-
-  RETURN_IF_ERROR(SetBackendConfigDefaults(config));
-
-  std::shared_ptr<TritonBackend> backend;
-  RETURN_IF_ERROR(server->BackendManager()->CreateBackend(
-      model_config.backend(), backend_libdir, backend_libpath, config,
-      &backend));
-
-  // Normalize backend-dependent config
-  {
-    const auto& attributes = backend->BackendAttributes();
-    // [WIP] formalize config normalization / validation
-    RETURN_IF_ERROR(NormalizeInstanceGroup(
-        min_compute_capability, attributes.preferred_groups_, &model_config));
-    RETURN_IF_ERROR(
-        ValidateInstanceGroup(model_config, min_compute_capability));
-  }
-
-  // Create and initialize the model.
-  std::unique_ptr<TritonModel> local_model(new TritonModel(
-      server, localized_model_dir, backend, min_compute_capability, version,
-      model_config, auto_complete_config));
-
-  TritonModel* raw_local_model = local_model.get();
-
-  // Model initialization is optional... The TRITONBACKEND_Model
-  // object is this TritonModel object. We must set set shared library
-  // path to point to the backend directory in case the backend
-  // library attempts to load additional shared libaries.
-  if (backend->ModelInitFn() != nullptr) {
-    std::unique_ptr<SharedLibrary> slib;
-    RETURN_IF_ERROR(SharedLibrary::Acquire(&slib));
-    RETURN_IF_ERROR(slib->SetLibraryDirectory(backend->Directory()));
-
-    TRITONSERVER_Error* err = backend->ModelInitFn()(
-        reinterpret_cast<TRITONBACKEND_Model*>(raw_local_model));
-
-    RETURN_IF_ERROR(slib->ResetLibraryDirectory());
-    RETURN_IF_TRITONSERVER_ERROR(err);
-  }
-
-  // Initialize the model for Triton core usage
-  RETURN_IF_ERROR(local_model->Init(is_config_provided));
-
-  bool device_blocking = false;
-  if (local_model->backend_->ExecutionPolicy() ==
-      TRITONBACKEND_EXECUTION_DEVICE_BLOCKING) {
-    if (model_config.has_sequence_batching()) {
-      LOG_INFO << "Overriding execution policy to "
-                  "\"TRITONBACKEND_EXECUTION_BLOCKING\" for sequence model \""
-               << model_config.name() << "\"";
-    } else {
-      device_blocking = true;
-    }
-  }
-
-  // Create and initialize the model instances for this model.
-  RETURN_IF_ERROR(TritonModelInstance::CreateInstances(
-      raw_local_model, backend_cmdline_config_map, host_policy_map,
-      model_config, device_blocking));
-
-  RETURN_IF_ERROR(local_model->SetConfiguredScheduler());
-
-  *model = std::move(local_model);
-  return Status::Success;
-}
-
-Status
-TritonModel::ResolveBackendConfigs(
-    const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
-    const std::string& backend_name,
-    triton::common::BackendCmdlineConfig& config)
-{
-  const auto& global_itr = backend_cmdline_config_map.find(std::string());
-  const auto& specific_itr = backend_cmdline_config_map.find(backend_name);
-  if (specific_itr == backend_cmdline_config_map.end() &&
-      global_itr != backend_cmdline_config_map.end()) {
-    for (auto setting : global_itr->second) {
-      config.push_back(setting);
-    }
-  } else if (
-      specific_itr != backend_cmdline_config_map.end() &&
-      global_itr == backend_cmdline_config_map.end()) {
-    for (auto setting : specific_itr->second) {
-      config.push_back(setting);
-    }
-  } else if (
-      specific_itr != backend_cmdline_config_map.end() &&
-      global_itr != backend_cmdline_config_map.end()) {
-    triton::common::BackendCmdlineConfig global_backend_config =
-        global_itr->second;
-    triton::common::BackendCmdlineConfig specific_backend_config =
-        specific_itr->second;
-
-    std::sort(global_backend_config.begin(), global_backend_config.end());
-    std::sort(specific_backend_config.begin(), specific_backend_config.end());
-
-    size_t global_index = 0;
-    size_t specific_index = 0;
-    while (global_index < global_backend_config.size() &&
-           specific_index < specific_backend_config.size()) {
-      auto& current_global_setting = global_backend_config.at(global_index);
-      auto& current_specific_setting =
-          specific_backend_config.at(specific_index);
-      if (current_specific_setting.first.compare(
-              current_global_setting.first) == 0) {
-        // specific setting overrides global setting
-        config.push_back(current_specific_setting);
-        ++global_index;
-        ++specific_index;
-      } else if (
-          current_specific_setting.first.compare(current_global_setting.first) <
-          0) {
-        config.push_back(current_specific_setting);
-        ++specific_index;
-      } else {
-        config.push_back(current_global_setting);
-        ++global_index;
-      }
-    }
-
-    // add the rest of the global configs
-    if (global_index < global_backend_config.size()) {
-      auto& current_global_setting = global_backend_config.at(global_index);
-      config.push_back(current_global_setting);
-    }
-
-    // add the rest of the specific settings
-    if (specific_index < specific_backend_config.size()) {
-      auto& current_specific_setting =
-          specific_backend_config.at(specific_index);
-      config.push_back(current_specific_setting);
-    }
-  }  // else empty config
-
-  return Status::Success;
-}
-
-
-const std::unordered_map<std::string, std::string> backend_config_defaults(
-    {{"default-max-batch-size", "4"}});
-
-Status
-TritonModel::SetBackendConfigDefaults(
-    triton::common::BackendCmdlineConfig& config)
-{
-  auto backend_config_defaults_copy = backend_config_defaults;
-
-  for (auto& setting : config) {
-    if (setting.first.compare("default-max-batch-size") == 0) {
-      LOG_VERBOSE(1) << "Found overwritten default setting: " << setting.first
-                     << "," << setting.second;
-      backend_config_defaults_copy.erase(setting.first);
-    }
-
-    if (backend_config_defaults_copy.empty()) {
-      break;
-    }
-  }
-
-  // Anything left should be added to the config
-  for (const auto& default_setting : backend_config_defaults_copy) {
-    LOG_VERBOSE(1) << "Adding default backend config setting: "
-                   << default_setting.first << "," << default_setting.second;
-    config.push_back(
-        std::make_pair(default_setting.first, default_setting.second));
-  }
-
-  return Status::Success;
-}
-
-Status
-TritonModel::AddInstance(
-    std::unique_ptr<TritonModelInstance>&& instance, const bool passive)
-{
-  if (passive) {
-    passive_instances_.emplace_back(std::move(instance));
-  } else {
-    instances_.emplace_back(std::move(instance));
-  }
-
-  return Status::Success;
-}
-
-Status
-TritonModel::UpdateModelConfig(
-    const uint32_t config_version, TRITONSERVER_Message* updated_config_message)
-{
-  const char* buffer;
-  size_t byte_size;
-  RETURN_IF_TRITONSERVER_ERROR(TRITONSERVER_MessageSerializeToJson(
-      updated_config_message, &buffer, &byte_size));
-  inference::ModelConfig updated_config;
-  RETURN_IF_ERROR(
-      JsonToModelConfig({buffer, byte_size}, config_version, &updated_config));
-  auto config = Config();
-  config.set_max_batch_size(updated_config.max_batch_size());
-
-  auto inputs_config = config.mutable_input();
-  *inputs_config = updated_config.input();
-  auto outputs_config = config.mutable_output();
-  *outputs_config = updated_config.output();
-
-  if (!config.scheduling_choice_case()) {
-    if (updated_config.has_dynamic_batching()) {
-      auto dynamic_batching_config = config.mutable_dynamic_batching();
-      *dynamic_batching_config = updated_config.dynamic_batching();
-    } else if (updated_config.has_sequence_batching()) {
-      auto sequence_batching_config = config.mutable_sequence_batching();
-      *sequence_batching_config = updated_config.sequence_batching();
-    } else if (updated_config.has_ensemble_scheduling()) {
-      auto ensemble_scheduling_config = config.mutable_ensemble_scheduling();
-      *ensemble_scheduling_config = updated_config.ensemble_scheduling();
-    }  // else do nothing
-  } else if (
-      config.scheduling_choice_case() !=
-      updated_config.scheduling_choice_case()) {
-    return Status(
-        triton::common::Error::Code::INTERNAL,
-        (std::string("Cannot update scheduling choice from ") +
-         std::to_string(config.scheduling_choice_case()) + std::string(" to ") +
-         std::to_string(config.scheduling_choice_case()) +
-         std::string(" when auto-completing."))
-            .c_str());
-  }  // else do nothing
-
-  // Need to normalize the model configuration for
-  // populating missing fields.
-  RETURN_IF_ERROR(NormalizeModelConfig(min_compute_capability_, &config));
-
-  RETURN_IF_ERROR(SetModelConfig(config));
-
-  return Status::Success;
-}
-
-Status
-TritonModel::SetConfiguredScheduler()
-{
-  std::unique_ptr<Scheduler> scheduler;
-
-  // Need to enforce equal shape batches (i.e. non-ragged batches) if
-  // the model 1) allows one or more variable-size input tensors that
-  // are not marked as 'allow_ragged_batch' or 2) has one or more
-  // shape-tensor inputs. This is not needed if all input shapes are
-  // non-variable and if there are no shape tensors... so we don't
-  // enable it in that case for efficiency reasons.
-  std::unordered_map<std::string, bool> enforce_equal_shape_tensors;
-  for (const auto input : config_.input()) {
-    if (input.is_shape_tensor()) {
-      enforce_equal_shape_tensors.insert({input.name(), true});
-    } else if (
-        !input.allow_ragged_batch() &&
-        (triton::common::GetElementCount(input) == -1)) {
-      enforce_equal_shape_tensors.insert({input.name(), false});
-    }
-  }
-
-  // If 'sequence_batching' is configured, then use the SequenceBatchScheduler,
-  // otherwise use the default DynamicBatchScheduler.
-  if (config_.has_sequence_batching()) {
-    // Sequence batcher
-    RETURN_IF_ERROR(SequenceBatchScheduler::Create(
-        this, enforce_equal_shape_tensors, &scheduler));
-  } else if (config_.has_dynamic_batching()) {
-    // Dynamic batcher
-    RETURN_IF_ERROR(DynamicBatchScheduler::Create(
-        this, nullptr, 0 /*nice*/, true /* dynamic_batching_enabled */,
-        config_.max_batch_size(), enforce_equal_shape_tensors,
-        config_.dynamic_batching(),
-        config_.response_cache().enable() /* response_cache_enable */,
-        &scheduler));
-  } else {
-    // Default scheduler. Use dynamic batch scheduler (with batching
-    // disabled) as the default scheduler.
-    RETURN_IF_ERROR(DynamicBatchScheduler::Create(
-        this, nullptr, 0 /*nice*/, false /* dynamic_batching_enabled */,
-        1 /* max_batch_size */,
-        std::unordered_map<
-            std::string, bool>() /* enforce_equal_shape_tensors */,
-        false /* preserve_ordering */,
-        config_.response_cache().enable() /* response_cache_enable */,
-        std::set<int32_t>() /* preferred_batch_sizes */,
-        0 /* max_queue_delay_microseconds */, &scheduler));
-  }
-
-  return SetScheduler(std::move(scheduler));
-}
-
-Status
-TritonModel::Initialize()
-{
-  for (const auto& instance : instances_) {
-    RETURN_IF_ERROR(instance->Initialize());
-  }
-
-  return Status::Success;
-}
-
-Status
-TritonModel::WarmUp()
-{
-  for (const auto& instance : instances_) {
-    RETURN_IF_ERROR(instance->WarmUp());
-  }
-
-  return Status::Success;
-}
-
-TritonModel::TritonModel(
-    InferenceServer* server,
-    const std::shared_ptr<LocalizedPath>& localized_model_dir,
-    const std::shared_ptr<TritonBackend>& backend,
-    const double min_compute_capability, const int64_t version,
-    const inference::ModelConfig& config, const bool auto_complete_config)
-    : Model(
-          min_compute_capability, localized_model_dir->Path(), version, config),
-      server_(server), min_compute_capability_(min_compute_capability),
-      auto_complete_config_(auto_complete_config),
-      localized_model_dir_(localized_model_dir), backend_(backend),
-      state_(nullptr)
-{
-}
-
-TritonModel::~TritonModel()
-{
-  // Explicitly delete/finalize all model instances before finalizing
-  // the model itself.
-  instances_.clear();
-  passive_instances_.clear();
-
-  // Unregister itself from the rate limiter. Note this should happen
-  // after all instances are destructed. Destrucing instances ensures
-  // there are no instance threads waiting on rate limiter for
-  // receiving their payloads.
-  server_->GetRateLimiter()->UnregisterModel(this);
-
-  // Model finalization is optional... The TRITONBACKEND_Model
-  // object is this TritonModel object.
-  if (backend_->ModelFiniFn() != nullptr) {
-    LOG_TRITONSERVER_ERROR(
-        backend_->ModelFiniFn()(reinterpret_cast<TRITONBACKEND_Model*>(this)),
-        "failed finalizing model");
-  }
-}
-
-extern "C" {
-
-//
-// TRITONBACKEND_Model
-//
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelName(TRITONBACKEND_Model* model, const char** name)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  *name = tm->Name().c_str();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelVersion(TRITONBACKEND_Model* model, uint64_t* version)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  *version = tm->Version();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelRepository(
-    TRITONBACKEND_Model* model, TRITONBACKEND_ArtifactType* artifact_type,
-    const char** location)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  *artifact_type = TRITONBACKEND_ARTIFACT_FILESYSTEM;
-  *location = tm->LocalizedModelPath().c_str();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelConfig(
-    TRITONBACKEND_Model* model, const uint32_t config_version,
-    TRITONSERVER_Message** model_config)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-
-  std::string model_config_json;
-  Status status =
-      ModelConfigToJson(tm->Config(), config_version, &model_config_json);
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-
-  *model_config = reinterpret_cast<TRITONSERVER_Message*>(
-      new TritonServerMessage(std::move(model_config_json)));
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelAutoCompleteConfig(
-    TRITONBACKEND_Model* model, bool* auto_complete_config)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  *auto_complete_config = tm->AutoCompleteConfig();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelSetConfig(
-    TRITONBACKEND_Model* model, const uint32_t config_version,
-    TRITONSERVER_Message* model_config)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  Status status = tm->UpdateModelConfig(config_version, model_config);
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelServer(
-    TRITONBACKEND_Model* model, TRITONSERVER_Server** server)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  *server = reinterpret_cast<TRITONSERVER_Server*>(tm->Server());
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelBackend(
-    TRITONBACKEND_Model* model, TRITONBACKEND_Backend** backend)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  *backend = reinterpret_cast<TRITONBACKEND_Backend*>(tm->Backend().get());
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelState(TRITONBACKEND_Model* model, void** state)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  *state = tm->State();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelSetState(TRITONBACKEND_Model* model, void* state)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  tm->SetState(state);
-  return nullptr;  // success
-}
-
-///
-/// TRITONBACKEND_Request
-///
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestId(TRITONBACKEND_Request* request, const char** id)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  *id = tr->Id().c_str();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestCorrelationId(TRITONBACKEND_Request* request, uint64_t* id)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  const InferenceRequest::SequenceId& correlation_id = tr->CorrelationId();
-  if (correlation_id.Type() != InferenceRequest::SequenceId::DataType::UINT64) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (tr->LogRequest() + "correlation ID in request is not an unsigned int")
-            .c_str());
-  }
-  *id = correlation_id.UnsignedIntValue();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestFlags(TRITONBACKEND_Request* request, uint32_t* flags)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  *flags = tr->Flags();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestCorrelationIdString(
-    TRITONBACKEND_Request* request, const char** id)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  const InferenceRequest::SequenceId& correlation_id = tr->CorrelationId();
-  if (correlation_id.Type() != InferenceRequest::SequenceId::DataType::STRING) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (tr->LogRequest() + "correlation ID in request is not a string")
-            .c_str());
-  }
-  *id = correlation_id.StringValue().c_str();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestInputCount(TRITONBACKEND_Request* request, uint32_t* count)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  *count = tr->ImmutableInputs().size();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestInputName(
-    TRITONBACKEND_Request* request, const uint32_t index,
-    const char** input_name)
-{
-  *input_name = nullptr;
-
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  const auto& inputs = tr->ImmutableInputs();
-  if (index >= inputs.size()) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (tr->LogRequest() + "out of bounds index " + std::to_string(index) +
-         ": request has " + std::to_string(inputs.size()) + " inputs")
-            .c_str());
-  }
-
-  // The request inputs are not allowed to change once the request
-  // makes it to the backend, so it is ok to just iterate through the
-  // map. This linear search is the best we can do given the need for
-  // the inputs to be in a map and given the typical small number of
-  // inputs is better than having every request maintain the inputs as
-  // both map and vector.
-  uint32_t cnt = 0;
-  for (const auto& pr : inputs) {
-    if (cnt++ == index) {
-      InferenceRequest::Input* in = pr.second;
-      *input_name = in->Name().c_str();
-      break;
-    }
-  }
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestInput(
-    TRITONBACKEND_Request* request, const char* name,
-    TRITONBACKEND_Input** input)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  const auto& inputs = tr->ImmutableInputs();
-  const auto& itr = inputs.find(name);
-  if (itr == inputs.end()) {
-    *input = nullptr;
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (tr->LogRequest() + "unknown request input name " + name).c_str());
-  }
-
-  InferenceRequest::Input* in = itr->second;
-  *input = reinterpret_cast<TRITONBACKEND_Input*>(in);
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestInputByIndex(
-    TRITONBACKEND_Request* request, const uint32_t index,
-    TRITONBACKEND_Input** input)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  const auto& inputs = tr->ImmutableInputs();
-  if (index >= inputs.size()) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (tr->LogRequest() + "out of bounds index " + std::to_string(index) +
-         ": request has " + std::to_string(inputs.size()) + " inputs")
-            .c_str());
-  }
-
-  // The request inputs are not allowed to change once the request
-  // makes it to the backend, so it is ok to just iterate through the
-  // map. This linear search is the best we can do given the need for
-  // the inputs to be in a map and given the typical small number of
-  // inputs is better than having every request maintain the inputs as
-  // both map and vector.
-  uint32_t cnt = 0;
-  for (const auto& pr : inputs) {
-    if (cnt++ == index) {
-      InferenceRequest::Input* in = pr.second;
-      *input = reinterpret_cast<TRITONBACKEND_Input*>(in);
-      break;
-    }
-  }
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestOutputCount(
-    TRITONBACKEND_Request* request, uint32_t* count)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  *count = tr->ImmutableRequestedOutputs().size();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestOutputName(
-    TRITONBACKEND_Request* request, const uint32_t index,
-    const char** output_name)
-{
-  *output_name = nullptr;
-
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  const auto& routputs = tr->ImmutableRequestedOutputs();
-  if (index >= routputs.size()) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (tr->LogRequest() + "out of bounds index " + std::to_string(index) +
-         ": request has " + std::to_string(routputs.size()) +
-         " requested outputs")
-            .c_str());
-  }
-
-  // The requested outputs are not allowed to change once the request
-  // makes it to the backend, so it is ok to just iterate through the
-  // set. This linear search is the best we can do given the requested
-  // outputs being in a set and given the typical small number of
-  // requested outputs it should not be a performance issue.
-  uint32_t cnt = 0;
-  for (const auto& rout : routputs) {
-    if (cnt++ == index) {
-      *output_name = rout.c_str();
-      break;
-    }
-  }
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestOutputBufferProperties(
-    TRITONBACKEND_Request* request, const char* name, size_t* byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  auto status =
-      tr->OutputBufferProperties(name, byte_size, memory_type, memory_type_id);
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestRelease(
-    TRITONBACKEND_Request* request, uint32_t release_flags)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  std::unique_ptr<InferenceRequest> ur(tr);
-  InferenceRequest::Release(std::move(ur), release_flags);
-  return nullptr;  // success
-}
-
-///
-/// TRITONBACKEND_State
-///
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_StateUpdate(TRITONBACKEND_State* state)
-{
-  SequenceState* ts = reinterpret_cast<SequenceState*>(state);
-  auto status = ts->Update();
-
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_StateNew(
-    TRITONBACKEND_State** state, TRITONBACKEND_Request* request,
-    const char* name, const TRITONSERVER_DataType datatype,
-    const int64_t* shape, const uint32_t dims_count)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  SequenceState* lstate;
-  std::vector<int64_t> lshape(shape, shape + dims_count);
-  auto& sequence_state = tr->GetSequenceStates();
-
-  if (sequence_state == nullptr) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (std::string("unable to add state '") + name +
-         "'. State configuration is missing for model '" + tr->ModelName() +
-         "'.")
-            .c_str());
-  }
-
-  Status status = sequence_state->OutputState(
-      name, TritonToDataType(datatype), lshape, &lstate);
-  if (!status.IsOk()) {
-    *state = nullptr;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-
-  *state = reinterpret_cast<TRITONBACKEND_State*>(lstate);
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_StateBuffer(
-    TRITONBACKEND_State* state, void** buffer, const uint64_t buffer_byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
-{
-  SequenceState* to = reinterpret_cast<SequenceState*>(state);
-  Status status = Status::Success;
-
-  // If the buffer size exactly matches the buffer available, reuse the
-  // currently allocated buffer.
-  if (to->Data()->TotalByteSize() == buffer_byte_size) {
-    const std::shared_ptr<AllocatedMemory>& memory =
-        reinterpret_cast<const std::shared_ptr<AllocatedMemory>&>(to->Data());
-
-    TRITONSERVER_MemoryType current_memory_type;
-    int64_t current_memory_type_id;
-    void* lbuffer =
-        memory->MutableBuffer(&current_memory_type, &current_memory_type_id);
-
-    // If the requested memory type doesn't match the current buffer, allocate a
-    // new buffer with the requested memory type and memory type id.
-    if (current_memory_type == *memory_type &&
-        current_memory_type_id == *memory_type_id) {
-      *buffer = lbuffer;
-    } else {
-      std::shared_ptr<AllocatedMemory> memory =
-          std::make_shared<AllocatedMemory>(
-              buffer_byte_size, *memory_type, *memory_type_id);
-      *buffer = memory->MutableBuffer(memory_type, memory_type_id);
-      to->RemoveAllData();
-      status = to->SetData(memory);
-    }
-  } else {
-    std::shared_ptr<AllocatedMemory> memory = std::make_shared<AllocatedMemory>(
-        buffer_byte_size, *memory_type, *memory_type_id);
-    *buffer = memory->MutableBuffer(memory_type, memory_type_id);
-    to->RemoveAllData();
-    status = to->SetData(memory);
-  }
-
-  if (!status.IsOk()) {
-    *buffer = nullptr;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_StateBufferAttributes(
-    TRITONBACKEND_State* state,
-    TRITONSERVER_BufferAttributes** buffer_attributes)
-{
-  SequenceState* to = reinterpret_cast<SequenceState*>(state);
-  to->Data()->BufferAt(
-      0, reinterpret_cast<BufferAttributes**>(buffer_attributes));
-
-  return nullptr;  // success
-}
-
-//
-// TRITONBACKEND_ResponseFactory
-//
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseFactoryNew(
-    TRITONBACKEND_ResponseFactory** factory, TRITONBACKEND_Request* request)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  std::shared_ptr<InferenceResponseFactory>* response_factory =
-      new std::shared_ptr<InferenceResponseFactory>(tr->ResponseFactory());
-
-  *factory = reinterpret_cast<TRITONBACKEND_ResponseFactory*>(response_factory);
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseFactoryDelete(TRITONBACKEND_ResponseFactory* factory)
-{
-  std::shared_ptr<InferenceResponseFactory>* response_factory =
-      reinterpret_cast<std::shared_ptr<InferenceResponseFactory>*>(factory);
-  delete response_factory;
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseFactorySendFlags(
-    TRITONBACKEND_ResponseFactory* factory, const uint32_t send_flags)
-{
-  std::shared_ptr<InferenceResponseFactory>* response_factory =
-      reinterpret_cast<std::shared_ptr<InferenceResponseFactory>*>(factory);
-  Status status = (*response_factory)->SendFlags(send_flags);
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-///
-/// TRITONBACKEND_Response
-///
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseNew(
-    TRITONBACKEND_Response** response, TRITONBACKEND_Request* request)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-
-  std::unique_ptr<InferenceResponse> tresp;
-  Status status = tr->ResponseFactory()->CreateResponse(&tresp);
-  if (!status.IsOk()) {
-    *response = nullptr;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-
-  *response = reinterpret_cast<TRITONBACKEND_Response*>(tresp.release());
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseNewFromFactory(
-    TRITONBACKEND_Response** response, TRITONBACKEND_ResponseFactory* factory)
-{
-  std::shared_ptr<InferenceResponseFactory>* response_factory =
-      reinterpret_cast<std::shared_ptr<InferenceResponseFactory>*>(factory);
-
-  std::unique_ptr<InferenceResponse> tr;
-  Status status = (*response_factory)->CreateResponse(&tr);
-  if (!status.IsOk()) {
-    *response = nullptr;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-
-  *response = reinterpret_cast<TRITONBACKEND_Response*>(tr.release());
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseDelete(TRITONBACKEND_Response* response)
-{
-  InferenceResponse* tr = reinterpret_cast<InferenceResponse*>(response);
-  delete tr;
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseSetStringParameter(
-    TRITONBACKEND_Response* response, const char* name, const char* value)
-{
-  InferenceResponse* tr = reinterpret_cast<InferenceResponse*>(response);
-  Status status = tr->AddParameter(name, value);
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseSetIntParameter(
-    TRITONBACKEND_Response* response, const char* name, const int64_t value)
-{
-  InferenceResponse* tr = reinterpret_cast<InferenceResponse*>(response);
-  Status status = tr->AddParameter(name, value);
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseSetBoolParameter(
-    TRITONBACKEND_Response* response, const char* name, const bool value)
-{
-  InferenceResponse* tr = reinterpret_cast<InferenceResponse*>(response);
-  Status status = tr->AddParameter(name, value);
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseOutput(
-    TRITONBACKEND_Response* response, TRITONBACKEND_Output** output,
-    const char* name, const TRITONSERVER_DataType datatype,
-    const int64_t* shape, const uint32_t dims_count)
-{
-  InferenceResponse* tr = reinterpret_cast<InferenceResponse*>(response);
-  std::vector<int64_t> lshape(shape, shape + dims_count);
-  InferenceResponse::Output* loutput;
-  Status status = tr->AddOutput(
-      name, TritonToDataType(datatype), std::move(lshape), &loutput);
-  if (!status.IsOk()) {
-    *output = nullptr;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-
-  *output = reinterpret_cast<TRITONBACKEND_Output*>(loutput);
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseSend(
-    TRITONBACKEND_Response* response, const uint32_t send_flags,
-    TRITONSERVER_Error* error)
-{
-  InferenceResponse* tr = reinterpret_cast<InferenceResponse*>(response);
-
-  Status status;
-
-  std::unique_ptr<InferenceResponse> utr(tr);
-  if (error == nullptr) {
-    status = InferenceResponse::Send(std::move(utr), send_flags);
-  } else {
-    status = InferenceResponse::SendWithStatus(
-        std::move(utr), send_flags,
-        Status(
-            TritonCodeToStatusCode(TRITONSERVER_ErrorCode(error)),
-            TRITONSERVER_ErrorMessage(error)));
-  }
-
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-
-  return nullptr;  // success
-}
-
-///
-/// TRITONBACKEND_Input
-///
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_InputProperties(
-    TRITONBACKEND_Input* input, const char** name,
-    TRITONSERVER_DataType* datatype, const int64_t** shape,
-    uint32_t* dims_count, uint64_t* byte_size, uint32_t* buffer_count)
-{
-  InferenceRequest::Input* ti =
-      reinterpret_cast<InferenceRequest::Input*>(input);
-  if (name != nullptr) {
-    *name = ti->Name().c_str();
-  }
-  if (datatype != nullptr) {
-    *datatype = DataTypeToTriton(ti->DType());
-  }
-  if (shape != nullptr) {
-    *shape = ti->ShapeWithBatchDim().data();
-  }
-  if (dims_count != nullptr) {
-    *dims_count = ti->ShapeWithBatchDim().size();
-  }
-  if (byte_size != nullptr) {
-    *byte_size = ti->Data()->TotalByteSize();
-  }
-  if (buffer_count != nullptr) {
-    *buffer_count = ti->DataBufferCount();
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_InputPropertiesForHostPolicy(
-    TRITONBACKEND_Input* input, const char* host_policy_name, const char** name,
-    TRITONSERVER_DataType* datatype, const int64_t** shape,
-    uint32_t* dims_count, uint64_t* byte_size, uint32_t* buffer_count)
-{
-  InferenceRequest::Input* ti =
-      reinterpret_cast<InferenceRequest::Input*>(input);
-  if (name != nullptr) {
-    *name = ti->Name().c_str();
-  }
-  if (datatype != nullptr) {
-    *datatype = DataTypeToTriton(ti->DType());
-  }
-  if (shape != nullptr) {
-    *shape = ti->ShapeWithBatchDim().data();
-  }
-  if (dims_count != nullptr) {
-    *dims_count = ti->ShapeWithBatchDim().size();
-  }
-  if (host_policy_name != nullptr) {
-    if (byte_size != nullptr) {
-      *byte_size = ti->Data(host_policy_name)->TotalByteSize();
-    }
-    if (buffer_count != nullptr) {
-      *buffer_count = ti->DataBufferCountForHostPolicy(host_policy_name);
-    }
-  } else {
-    if (byte_size != nullptr) {
-      *byte_size = ti->Data()->TotalByteSize();
-    }
-    if (buffer_count != nullptr) {
-      *buffer_count = ti->DataBufferCount();
-    }
-  }
-  return nullptr;  // success
-}
-
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_InputBuffer(
-    TRITONBACKEND_Input* input, const uint32_t index, const void** buffer,
-    uint64_t* buffer_byte_size, TRITONSERVER_MemoryType* memory_type,
-    int64_t* memory_type_id)
-{
-  InferenceRequest::Input* ti =
-      reinterpret_cast<InferenceRequest::Input*>(input);
-  Status status = ti->DataBuffer(
-      index, buffer, buffer_byte_size, memory_type, memory_type_id);
-  if (!status.IsOk()) {
-    *buffer = nullptr;
-    *buffer_byte_size = 0;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_InputBufferAttributes(
-    TRITONBACKEND_Input* input, const uint32_t index, const void** buffer,
-    TRITONSERVER_BufferAttributes** buffer_attributes)
-{
-  InferenceRequest::Input* ti =
-      reinterpret_cast<InferenceRequest::Input*>(input);
-  Status status = ti->DataBufferAttributes(
-      index, buffer, reinterpret_cast<BufferAttributes**>(buffer_attributes));
-  if (!status.IsOk()) {
-    *buffer = nullptr;
-    *buffer_attributes = nullptr;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_InputBufferForHostPolicy(
-    TRITONBACKEND_Input* input, const char* host_policy_name,
-    const uint32_t index, const void** buffer, uint64_t* buffer_byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
-{
-  InferenceRequest::Input* ti =
-      reinterpret_cast<InferenceRequest::Input*>(input);
-
-  Status status =
-      (host_policy_name == nullptr)
-          ? ti->DataBuffer(
-                index, buffer, buffer_byte_size, memory_type, memory_type_id)
-          : ti->DataBufferForHostPolicy(
-                index, buffer, buffer_byte_size, memory_type, memory_type_id,
-                host_policy_name);
-  if (!status.IsOk()) {
-    *buffer = nullptr;
-    *buffer_byte_size = 0;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-///
-/// TRITONBACKEND_Output
-///
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_OutputBuffer(
-    TRITONBACKEND_Output* output, void** buffer,
-    const uint64_t buffer_byte_size, TRITONSERVER_MemoryType* memory_type,
-    int64_t* memory_type_id)
-{
-  InferenceResponse::Output* to =
-      reinterpret_cast<InferenceResponse::Output*>(output);
-  Status status = to->AllocateDataBuffer(
-      buffer, buffer_byte_size, memory_type, memory_type_id);
-  if (!status.IsOk()) {
-    *buffer = nullptr;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_OutputBufferAttributes(
-    TRITONBACKEND_Output* output,
-    TRITONSERVER_BufferAttributes** buffer_attributes)
-{
-  InferenceResponse::Output* to =
-      reinterpret_cast<InferenceResponse::Output*>(output);
-
-  *buffer_attributes = reinterpret_cast<TRITONSERVER_BufferAttributes*>(
-      to->GetBufferAttributes());
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendAttributeAddPreferredInstanceGroup(
-    TRITONBACKEND_BackendAttribute* backend_attributes,
-    const TRITONSERVER_InstanceGroupKind kind, const uint64_t count,
-    const uint64_t* device_ids, const uint64_t id_count)
-{
-  auto ba = reinterpret_cast<TritonBackend::Attribute*>(backend_attributes);
-  ba->preferred_groups_.emplace_back();
-  auto& pg = ba->preferred_groups_.back();
-  switch (kind) {
-    case TRITONSERVER_INSTANCEGROUPKIND_AUTO:
-      pg.set_kind(inference::ModelInstanceGroup::KIND_AUTO);
-      break;
-    case TRITONSERVER_INSTANCEGROUPKIND_CPU:
-      pg.set_kind(inference::ModelInstanceGroup::KIND_CPU);
-      break;
-    case TRITONSERVER_INSTANCEGROUPKIND_GPU:
-      pg.set_kind(inference::ModelInstanceGroup::KIND_GPU);
-      break;
-    case TRITONSERVER_INSTANCEGROUPKIND_MODEL:
-      pg.set_kind(inference::ModelInstanceGroup::KIND_MODEL);
-      break;
-  }
-  pg.set_count(count);
-  if (device_ids != nullptr) {
-    for (size_t i = 0; i < id_count; ++i) {
-      pg.add_gpus(device_ids[i]);
-    }
-  }
-  return nullptr;
-}
-
-}  // extern C
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/backend_model.h
+++ b/3rdparty/core-r22.12/src/backend_model.h
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <memory>
-#include <string>
-#include "backend_manager.h"
-#include "filesystem.h"
-#include "infer_request.h"
-#include "model.h"
-#include "model_config.pb.h"
-#include "status.h"
-
-namespace triton { namespace core {
-
-class InferenceServer;
-class TritonModelInstance;
-
-//
-// Represents a model.
-//
-// Inheriting from Model to implement backend APIs
-//
-class TritonModel : public Model {
- public:
-  static Status Create(
-      InferenceServer* server, const std::string& model_path,
-      const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
-      const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
-      const std::string& model_name, const int64_t version,
-      inference::ModelConfig model_config, const bool is_config_provided,
-      std::unique_ptr<TritonModel>* model);
-  ~TritonModel();
-
-  const std::string& LocalizedModelPath() const
-  {
-    return localized_model_dir_->Path();
-  }
-  InferenceServer* Server() { return server_; }
-  bool AutoCompleteConfig() const { return auto_complete_config_; }
-  Status UpdateModelConfig(
-      const uint32_t config_version,
-      TRITONSERVER_Message* updated_config_message);
-  const std::shared_ptr<TritonBackend>& Backend() const { return backend_; }
-  const std::vector<std::unique_ptr<TritonModelInstance>>& Instances() const
-  {
-    return instances_;
-  }
-  void* State() { return state_; }
-  void SetState(void* state) { state_ = state; }
-  Status AddInstance(
-      std::unique_ptr<TritonModelInstance>&& instance, const bool passive);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(TritonModel);
-
-  TritonModel(
-      InferenceServer* server,
-      const std::shared_ptr<LocalizedPath>& localized_model_dir,
-      const std::shared_ptr<TritonBackend>& backend,
-      const double min_compute_capability, const int64_t version,
-      const inference::ModelConfig& config, const bool auto_complete_config);
-
-  // Set the scheduler based on the model configuration. The scheduler
-  // can only be set once for a backend.
-  Status SetConfiguredScheduler();
-
-  // Merges the global backend configs with the specific
-  // backend configs.
-  static Status ResolveBackendConfigs(
-      const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
-      const std::string& backend_name,
-      triton::common::BackendCmdlineConfig& config);
-
-  // Sets defaults for some backend configurations when none are specified on
-  // the command line.
-  static Status SetBackendConfigDefaults(
-      triton::common::BackendCmdlineConfig& config);
-
-  Status Initialize();
-  Status WarmUp();
-
-  // The server object that owns this model. The model holds this as a
-  // raw pointer because the lifetime of the server is guaranteed to
-  // be longer than the lifetime of a model owned by the server.
-  InferenceServer* server_;
-
-  // The minimum supported compute capability on device.
-  const double min_compute_capability_;
-
-  // Whether the backend should attempt to auto-complete the model config.
-  const bool auto_complete_config_;
-
-  // The localized repo directory holding the model. If localization
-  // required creation of a temporary local copy then that copy will
-  // persist as along as this object is retained by this model.
-  std::shared_ptr<LocalizedPath> localized_model_dir_;
-
-  // Backend used by this model.
-  std::shared_ptr<TritonBackend> backend_;
-
-  // The model instances for this model.
-  std::vector<std::unique_ptr<TritonModelInstance>> instances_;
-  std::vector<std::unique_ptr<TritonModelInstance>> passive_instances_;
-
-  // Opaque state associated with this model.
-  void* state_;
-};
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/backend_model_instance.cc
+++ b/3rdparty/core-r22.12/src/backend_model_instance.cc
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "backend_model_instance.h"
-
-#ifndef _WIN32
-#include <sys/resource.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-#endif
-#include "backend_config.h"
-#include "backend_model.h"
-#include "cuda_utils.h"
-#include "metrics.h"
-#include "model_config.pb.h"
-#include "numa_utils.h"
-#include "server.h"
-#include "shared_library.h"
-#include "triton/common/logging.h"
-#include "triton/common/nvtx.h"
-#include "tritonserver_apis.h"
-
-// For unknown reason, windows will not export the TRITONBACKEND_*
-// functions declared with dllexport in tritonbackend.h. To get those
-// functions exported it is (also?) necessary to mark the definitions
-// in this file with dllexport as well.
-#if defined(_MSC_VER)
-#define TRITONAPI_DECLSPEC __declspec(dllexport)
-#elif defined(__GNUC__)
-#define TRITONAPI_DECLSPEC __attribute__((__visibility__("default")))
-#else
-#define TRITONAPI_DECLSPEC
-#endif
-
-namespace triton { namespace core {
-
-namespace {
-// Utilities for warmup feature
-TRITONSERVER_Error*
-WarmupResponseAlloc(
-    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
-    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
-    int64_t preferred_memory_type_id, void* userp, void** buffer,
-    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
-    int64_t* actual_memory_type_id)
-{
-  *buffer = malloc(byte_size);
-  if (*buffer != nullptr) {
-    *actual_memory_type = TRITONSERVER_MEMORY_CPU;
-    *actual_memory_type_id = 0;
-    return nullptr;
-  }
-
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_INTERNAL,
-      "failed to allocate output buffer for warmup.");
-}
-
-TRITONSERVER_Error*
-WarmupResponseRelease(
-    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  free(buffer);
-  return nullptr;
-}
-
-ResponseAllocator warmup_allocator = ResponseAllocator(
-    WarmupResponseAlloc, WarmupResponseRelease, nullptr /* start_fn */);
-
-void
-WarmupResponseComplete(
-    TRITONSERVER_InferenceResponse* iresponse, const uint32_t flags,
-    void* userp)
-{
-  auto res_pair = reinterpret_cast<
-      std::pair<std::promise<void>, std::vector<std::string>*>*>(userp);
-  if (iresponse != nullptr) {
-    auto err = TRITONSERVER_InferenceResponseError(iresponse);
-    if (err != nullptr) {
-      // The error vector is shared by all requests in the batch for now
-      static std::mutex res_mtx;
-      {
-        std::lock_guard<std::mutex> lk(res_mtx);
-        res_pair->second->emplace_back(TRITONSERVER_ErrorMessage(err));
-      }
-      TRITONSERVER_ErrorDelete(err);
-    }
-    // Just delete the response, warmup doesn't check for correctness
-    LOG_TRITONSERVER_ERROR(
-        TRITONSERVER_InferenceResponseDelete(iresponse),
-        "deleting warmup response");
-  }
-  // Last response
-  if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) != 0) {
-    res_pair->first.set_value();
-  }
-}
-
-void
-WarmupRequestComplete(
-    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
-{
-  if ((flags & TRITONSERVER_REQUEST_RELEASE_ALL) != 0) {
-    // Don't need to release request here, it is managed in WarmupData
-    if (userp != nullptr) {
-      auto warmup_promise = reinterpret_cast<std::promise<void>*>(userp);
-      warmup_promise->set_value();
-    }
-  }
-}
-
-}  // namespace
-
-TritonModelInstance::TritonModelInstance(
-    TritonModel* model, const std::string& name, const size_t index,
-    const TRITONSERVER_InstanceGroupKind kind, const int32_t device_id,
-    const std::vector<std::string>& profile_names, const bool passive,
-    const triton::common::HostPolicyCmdlineConfig& host_policy,
-    const TritonServerMessage& host_policy_message,
-    const std::vector<SecondaryDevice>& secondary_devices)
-    : model_(model), name_(name), index_(index), kind_(kind),
-      device_id_(device_id), host_policy_(host_policy),
-      host_policy_message_(host_policy_message), profile_names_(profile_names),
-      passive_(passive), secondary_devices_(secondary_devices), state_(nullptr)
-{
-#ifdef TRITON_ENABLE_METRICS
-  if (Metrics::Enabled()) {
-    // Use an ID in the metric only for GPU instances. Otherwise use
-    // METRIC_REPORTER_ID_CPU to indicate no device should be reported in the
-    // metric.
-    const int id = (kind_ == TRITONSERVER_INSTANCEGROUPKIND_GPU)
-                       ? device_id_
-                       : METRIC_REPORTER_ID_CPU;
-    MetricModelReporter::Create(
-        model_->Name(), model_->Version(), id, model_->Config().metric_tags(),
-        &reporter_);
-  }
-#endif  // TRITON_ENABLE_METRICS
-}
-
-TritonModelInstance::~TritonModelInstance()
-{
-  if (triton_backend_thread_.get() != nullptr) {
-    triton_backend_thread_->StopBackendThread();
-  }
-
-  // Model finalization is optional...
-  if (model_->Backend()->ModelInstanceFiniFn() != nullptr) {
-    LOG_TRITONSERVER_ERROR(
-        model_->Backend()->ModelInstanceFiniFn()(
-            reinterpret_cast<TRITONBACKEND_ModelInstance*>(this)),
-        "failed finalizing model instance");
-  }
-}
-
-Status
-TritonModelInstance::CreateInstances(
-    TritonModel* model,
-    const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
-    const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
-    const inference::ModelConfig& model_config, const bool device_blocking)
-{
-  static triton::common::HostPolicyCmdlineConfig empty_host_policy;
-
-  // This structure is used to allocate TritonBackendThread to instances on same
-  // device for device blocking execution policy.
-  std::map<uint32_t, std::shared_ptr<TritonBackendThread>> device_to_thread_map;
-
-  for (const auto& group : model_config.instance_group()) {
-    std::vector<std::string> profile_names;
-    for (const auto& profile_name : group.profile()) {
-      profile_names.push_back(profile_name);
-    }
-    std::vector<SecondaryDevice> secondary_devices;
-    for (const auto& secondary_device : group.secondary_devices()) {
-      secondary_devices.emplace_back(
-          inference::
-              ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind_Name(
-                  secondary_device.kind()),
-          secondary_device.device_id());
-    }
-    for (int32_t c = 0; c < group.count(); ++c) {
-      std::string instance_name{group.count() > 1
-                                    ? group.name() + "_" + std::to_string(c)
-                                    : group.name()};
-      const bool passive = group.passive();
-      std::vector<std::tuple<
-          std::string, TRITONSERVER_InstanceGroupKind, int32_t,
-          const inference::ModelRateLimiter*>>
-          instance_setting;
-      if (group.kind() == inference::ModelInstanceGroup::KIND_CPU) {
-        instance_setting.emplace_back(
-            group.host_policy().empty() ? "cpu" : group.host_policy(),
-            TRITONSERVER_INSTANCEGROUPKIND_CPU, 0 /* device_id */,
-            &group.rate_limiter());
-      } else if (group.kind() == inference::ModelInstanceGroup::KIND_GPU) {
-        for (const int32_t device_id : group.gpus()) {
-          instance_setting.emplace_back(
-              group.host_policy().empty() ? ("gpu_" + std::to_string(device_id))
-                                          : group.host_policy(),
-              TRITONSERVER_INSTANCEGROUPKIND_GPU, device_id,
-              &group.rate_limiter());
-        }
-      } else if (group.kind() == inference::ModelInstanceGroup::KIND_MODEL) {
-        instance_setting.emplace_back(
-            group.host_policy().empty() ? "model" : group.host_policy(),
-            TRITONSERVER_INSTANCEGROUPKIND_MODEL, 0 /* device_id */,
-            &group.rate_limiter());
-      } else {
-        return Status(
-            Status::Code::INVALID_ARG,
-            std::string("instance_group kind ") +
-                ModelInstanceGroup_Kind_Name(group.kind()) + " not supported");
-      }
-      for (const auto is : instance_setting) {
-        const auto& kind = std::get<1>(is);
-        const auto& id = std::get<2>(is);
-
-        const std::string& policy_name = std::get<0>(is);
-        const triton::common::HostPolicyCmdlineConfig* host_policy;
-        const auto policy_it = host_policy_map.find(policy_name);
-        if (policy_it != host_policy_map.end()) {
-          host_policy = &policy_it->second;
-        } else {
-          host_policy = &empty_host_policy;
-        }
-        RETURN_IF_ERROR(SetNumaConfigOnThread(*host_policy));
-        auto err = CreateInstance(
-            model, instance_name, c, kind, id, profile_names, passive,
-            policy_name, *host_policy, *(std::get<3>(is)), device_blocking,
-            &device_to_thread_map, secondary_devices);
-        RETURN_IF_ERROR(ResetNumaMemoryPolicy());
-        RETURN_IF_ERROR(err);
-
-        // When deploying on GPU, we want to make sure the GPU memory usage
-        // is within allowed range, otherwise, stop the creation to ensure
-        // there is sufficient GPU memory for other use.
-        // We check the usage after loading the instance to better enforcing
-        // the limit. If we check before loading, we may create instance
-        // that occupies the rest of available memory which against the purpose
-        if (kind == TRITONSERVER_INSTANCEGROUPKIND_GPU) {
-          size_t free, total;
-          double memory_limit;
-          RETURN_IF_ERROR(GetDeviceMemoryInfo(id, &free, &total));
-          RETURN_IF_ERROR(BackendConfigurationModelLoadGpuFraction(
-              backend_cmdline_config_map, id, &memory_limit));
-          const size_t allow = total * memory_limit;
-          const size_t used = total - free;
-          if (used > allow) {
-            return Status(
-                Status::Code::UNAVAILABLE,
-                std::string("can not create model '") + instance_name +
-                    "': memory limit set for " +
-                    TRITONSERVER_InstanceGroupKindString(kind) + " " +
-                    std::to_string(id) +
-                    " has exceeded, model loading is rejected.");
-          }
-        }
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-TritonModelInstance::CreateInstance(
-    TritonModel* model, const std::string& name, const size_t index,
-    const TRITONSERVER_InstanceGroupKind kind, const int32_t device_id,
-    const std::vector<std::string>& profile_names, const bool passive,
-    const std::string& host_policy_name,
-    const triton::common::HostPolicyCmdlineConfig& host_policy,
-    const inference::ModelRateLimiter& rate_limiter_config,
-    const bool device_blocking,
-    std::map<uint32_t, std::shared_ptr<TritonBackendThread>>*
-        device_to_thread_map,
-    const std::vector<SecondaryDevice>& secondary_devices)
-{
-  // Create the JSON representation of the backend configuration.
-  triton::common::TritonJson::Value host_policy_json(
-      triton::common::TritonJson::ValueType::OBJECT);
-  triton::common::TritonJson::Value policy_setting_json(
-      host_policy_json, triton::common::TritonJson::ValueType::OBJECT);
-  for (const auto& pr : host_policy) {
-    RETURN_IF_ERROR(policy_setting_json.AddString(pr.first.c_str(), pr.second));
-  }
-
-  RETURN_IF_ERROR(host_policy_json.Add(
-      host_policy_name.c_str(), std::move(policy_setting_json)));
-  TritonServerMessage host_policy_message(host_policy_json);
-
-  std::unique_ptr<TritonModelInstance> local_instance(new TritonModelInstance(
-      model, name, index, kind, device_id, profile_names, passive, host_policy,
-      host_policy_message, secondary_devices));
-
-  TRITONBACKEND_ModelInstance* triton_instance =
-      reinterpret_cast<TRITONBACKEND_ModelInstance*>(local_instance.get());
-
-  // Instance initialization is optional... We must set set shared
-  // library path to point to the backend directory in case the
-  // backend library attempts to load additional shared libaries.
-  if (model->Backend()->ModelInstanceInitFn() != nullptr) {
-    std::unique_ptr<SharedLibrary> slib;
-    RETURN_IF_ERROR(SharedLibrary::Acquire(&slib));
-    RETURN_IF_ERROR(slib->SetLibraryDirectory(model->Backend()->Directory()));
-
-    TRITONSERVER_Error* err =
-        model->Backend()->ModelInstanceInitFn()(triton_instance);
-
-    RETURN_IF_ERROR(slib->ResetLibraryDirectory());
-    RETURN_IF_TRITONSERVER_ERROR(err);
-  }
-
-  if (!passive) {
-    RETURN_IF_ERROR(local_instance->GenerateWarmupData());
-    RETURN_IF_ERROR(model->Server()->GetRateLimiter()->RegisterModelInstance(
-        local_instance.get(), rate_limiter_config));
-    RETURN_IF_ERROR(local_instance->SetBackendThread(
-        kind, device_id, device_blocking, device_to_thread_map));
-  }
-
-  RETURN_IF_ERROR(model->AddInstance(std::move(local_instance), passive));
-
-  return Status::Success;
-}
-
-Status
-TritonModelInstance::SetBackendThread(
-    const TRITONSERVER_InstanceGroupKind kind, const int32_t device_id,
-    const bool device_blocking,
-    std::map<uint32_t, std::shared_ptr<TritonBackendThread>>*
-        device_to_thread_map)
-{
-  if (device_blocking && (kind == TRITONSERVER_INSTANCEGROUPKIND_GPU)) {
-    auto thread_it = device_to_thread_map->find(device_id);
-    if (thread_it != device_to_thread_map->end()) {
-      LOG_VERBOSE(1) << "Using already started backend thread for " << Name()
-                     << " on device " << device_id;
-      triton_backend_thread_ = thread_it->second;
-    }
-  }
-  if (triton_backend_thread_.get() == nullptr) {
-    std::unique_ptr<TritonBackendThread> local_backend_thread;
-    RETURN_IF_ERROR(TritonBackendThread::CreateBackendThread(
-        Name(), this, 0 /* nice */, device_id, &local_backend_thread));
-    triton_backend_thread_ = std::move(local_backend_thread);
-    device_to_thread_map->insert({device_id, triton_backend_thread_});
-  } else {
-    triton_backend_thread_->AddModelInstance(this);
-  }
-  RETURN_IF_ERROR(triton_backend_thread_->InitAndWarmUpModelInstance(this));
-
-  return Status::Success;
-}
-
-Status
-TritonModelInstance::GenerateWarmupData()
-{
-  warmup_samples_.clear();
-  for (const auto& warmup_setting : model_->Config().model_warmup()) {
-    if (warmup_setting.batch_size() == 0) {
-      LOG_VERBOSE(1) << "Skipping batch 0 warmup sample '"
-                     << warmup_setting.name() << "'";
-      continue;
-    }
-    LOG_VERBOSE(1) << "Generating warmup sample data for '"
-                   << warmup_setting.name() << "'";
-
-    // Two passes. First pass to get max byte size for synthetic
-    // data. Second pass to add original inputs and override inputs
-    // for control inputs.
-    int64_t max_zero_byte_size = 0;
-    int64_t max_random_byte_size = 0;
-    for (const auto& input_meta : warmup_setting.inputs()) {
-      auto element_count =
-          triton::common::GetElementCount(input_meta.second.dims());
-      if (element_count == -1) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "warmup setting expects all variable-size dimensions are specified "
-            "for input '" +
-                input_meta.first + "'");
-      }
-
-      int64_t batch_byte_size =
-          element_count *
-          triton::common::GetDataTypeByteSize(input_meta.second.data_type());
-      if (batch_byte_size == 0) {
-        batch_byte_size = element_count * sizeof(int32_t);
-      }
-
-      switch (input_meta.second.input_data_type_case()) {
-        case inference::ModelWarmup_Input::InputDataTypeCase::kZeroData:
-          max_zero_byte_size = std::max(batch_byte_size, max_zero_byte_size);
-          break;
-        case inference::ModelWarmup_Input::InputDataTypeCase::kRandomData: {
-          // Because Triton expects STRING type to be in special format
-          // (prepend 4 bytes to specify string length), so using zero data
-          // for simplicity (4 bytes * element count of zeros).
-          if (input_meta.second.data_type() ==
-              inference::DataType::TYPE_STRING) {
-            max_zero_byte_size = std::max(batch_byte_size, max_zero_byte_size);
-          } else {
-            max_random_byte_size =
-                std::max(batch_byte_size, max_random_byte_size);
-          }
-          break;
-        }
-        default:
-          break;
-      }
-    }
-
-    warmup_samples_.emplace_back(warmup_setting.name(), warmup_setting.count());
-    auto& warmup_data = warmup_samples_.back();
-    // Create buffers for synthetic data
-    TRITONSERVER_MemoryType type;
-    int64_t type_id;
-    warmup_data.zero_data_.reset(new AllocatedMemory(
-        max_zero_byte_size, TRITONSERVER_MEMORY_CPU_PINNED /* memory_type */,
-        0 /* memory_type_id */));
-    char* zero_buffer = warmup_data.zero_data_->MutableBuffer(&type, &type_id);
-    memset(zero_buffer, 0, max_zero_byte_size);
-
-    warmup_data.random_data_.reset(new AllocatedMemory(
-        max_random_byte_size, TRITONSERVER_MEMORY_CPU_PINNED /* memory_type */,
-        0 /* memory_type_id */));
-    char* random_buffer =
-        warmup_data.random_data_->MutableBuffer(&type, &type_id);
-    for (int64_t offset = 0; offset < max_random_byte_size; offset++) {
-      random_buffer[offset] = rand();
-    }
-
-    // Prepare the inference request for the specified sample, not using
-    // in-process C API because the request doesn't go through the same pipeline
-    // (i.e. no normalization / scheduler) so we need to prepare the request to
-    // the state just before calling instance execute function.
-    for (size_t cnt = 0; cnt < warmup_setting.batch_size(); cnt++) {
-      warmup_data.requests_.emplace_back(
-          new InferenceRequest(model_, model_->Version()));
-      auto& lrequest = warmup_data.requests_.back();
-
-      // Second pass to prepare original inputs.
-      std::vector<std::shared_ptr<InferenceRequest::Input>> input_sps;
-      for (const auto& input_meta : warmup_setting.inputs()) {
-        auto batch1_element_count =
-            triton::common::GetElementCount(input_meta.second.dims());
-        auto batch_byte_size =
-            batch1_element_count *
-            triton::common::GetDataTypeByteSize(input_meta.second.data_type());
-        if (batch_byte_size == 0) {
-          batch_byte_size = batch1_element_count * sizeof(int32_t);
-        }
-
-        const char* allocated_ptr;
-        switch (input_meta.second.input_data_type_case()) {
-          case inference::ModelWarmup_Input::InputDataTypeCase::kZeroData:
-            allocated_ptr = zero_buffer;
-            break;
-          case inference::ModelWarmup_Input::InputDataTypeCase::kRandomData: {
-            if (input_meta.second.data_type() ==
-                inference::DataType::TYPE_STRING) {
-              allocated_ptr = zero_buffer;
-            } else {
-              allocated_ptr = random_buffer;
-            }
-            break;
-          }
-          case inference::ModelWarmup_Input::InputDataTypeCase::
-              kInputDataFile: {
-            // For data provided from file, we can set buffer in first pass
-            warmup_data.provided_data_.emplace_back(new std::string());
-            auto input_data = warmup_data.provided_data_.back().get();
-            RETURN_IF_ERROR(ReadTextFile(
-                JoinPath({model_->LocalizedModelPath(), kWarmupDataFolder,
-                          input_meta.second.input_data_file()}),
-                input_data));
-            if (input_meta.second.data_type() ==
-                inference::DataType::TYPE_STRING) {
-              batch_byte_size = input_data->size();
-            } else if (((size_t)batch_byte_size) > input_data->size()) {
-              return Status(
-                  Status::Code::INVALID_ARG,
-                  lrequest->LogRequest() + "warmup setting expects " +
-                      std::to_string(batch_byte_size) +
-                      " bytes, but the data "
-                      "provided from " +
-                      input_meta.second.input_data_file() + "only has " +
-                      std::to_string(input_data->size()) + " bytes");
-            }
-            allocated_ptr = input_data->data();
-            break;
-          }
-          default:
-            return Status(
-                Status::Code::INVALID_ARG,
-                lrequest->LogRequest() + "warmup setting expects input '" +
-                    input_meta.first + "' to have input_data_type set");
-        }
-
-        const inference::ModelInput* input_config;
-        bool is_original_input =
-            model_->GetInput(input_meta.first, &input_config).IsOk();
-        InferenceRequest::Input* input = nullptr;
-        std::vector<int64_t> input_meta_shape;
-        // Append batch size only if the model supports batching
-        // and not control inpt.
-        if ((model_->Config().max_batch_size() != 0) && is_original_input) {
-          input_meta_shape.push_back(1);
-        }
-        for (auto d : input_meta.second.dims()) {
-          input_meta_shape.push_back(d);
-        }
-        if (is_original_input) {
-          RETURN_IF_ERROR(lrequest->AddOriginalInput(
-              input_meta.first, input_meta.second.data_type(), input_meta_shape,
-              &input));
-        } else {
-          input_sps.emplace_back();
-          RETURN_IF_ERROR(lrequest->AddOverrideInput(
-              input_meta.first, input_meta.second.data_type(),
-              (model_->Config().max_batch_size() != 0 ? 1 : 0),
-              input_meta_shape, &input_sps.back()));
-          input = input_sps.back().get();
-        }
-        RETURN_IF_ERROR(input->AppendData(
-            allocated_ptr, batch_byte_size,
-            TRITONSERVER_MEMORY_CPU /* memory_type */, 0 /* memory_type_id */));
-      }
-
-      RETURN_IF_ERROR(lrequest->PrepareForInference());
-      // Override inputs must be added after PrepareForInference() is called
-      for (const auto& sp : input_sps) {
-        RETURN_IF_ERROR(lrequest->AddOverrideInput(sp));
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-void
-TritonModelInstance::Schedule(
-    std::vector<std::unique_ptr<InferenceRequest>>&& requests,
-    const std::function<void()>& OnCompletion)
-{
-  // Use a thread local vector to avoid needing to malloc each
-  // time an inference is run.
-  thread_local std::vector<TRITONBACKEND_Request*> triton_requests(1024);
-  triton_requests.clear();
-  for (auto& r : requests) {
-    // Load the input states for the inference request.
-    r->LoadInputStates();
-    triton_requests.push_back(
-        reinterpret_cast<TRITONBACKEND_Request*>(r.release()));
-  }
-
-  Execute(triton_requests);
-
-  OnCompletion();
-}
-
-Status
-TritonModelInstance::Initialize()
-{
-  RETURN_IF_ERROR(SetNumaConfigOnThread(HostPolicy()));
-  return Status::Success;
-}
-
-Status
-TritonModelInstance::WarmUp()
-{
-  // move samples to local variable for scoped cleanup
-  std::vector<triton::core::TritonModelInstance::WarmupData> lwarmup_samples;
-  lwarmup_samples.swap(warmup_samples_);
-
-  for (auto& sample : lwarmup_samples) {
-    for (size_t iteration = 1; iteration <= sample.count_; ++iteration) {
-      LOG_VERBOSE(1) << "model '" << sample.requests_.back()->ModelName()
-                     << "' instance " << Name() << " is running warmup sample '"
-                     << sample.sample_name_ << "' for iteration " << iteration;
-
-      // request/response complete is asynchronous so use promise to wait for
-      // completion. Also collects error message from the responses in a vector.
-      std::vector<std::promise<void>> request_complete(sample.requests_.size());
-      std::vector<std::string> response_errors;
-      std::vector<std::pair<std::promise<void>, std::vector<std::string>*>>
-          response_complete(sample.requests_.size());
-
-      std::vector<TRITONBACKEND_Request*> triton_requests;
-      for (size_t i = 0; i < sample.requests_.size(); ++i) {
-        auto& request = sample.requests_[i];
-        request->SetReleaseCallback(
-            WarmupRequestComplete, &request_complete[i]);
-        response_complete[i].second = &response_errors;
-        request->SetResponseCallback(
-            &warmup_allocator, nullptr, WarmupResponseComplete,
-            &response_complete[i]);
-        // Capture timestamp before run to avoid incorrect accumulation from
-        // sequential warmup runs
-#ifdef TRITON_ENABLE_STATS
-        request->CaptureRequestStartNs();
-#endif  // TRITON_ENABLE_STATS
-        request->CaptureQueueStartNs();
-        triton_requests.push_back(
-            reinterpret_cast<TRITONBACKEND_Request*>(request.get()));
-      }
-
-      Execute(triton_requests);
-
-      // Wait for warmup sample to complete and check error
-      for (size_t i = 0; i < sample.requests_.size(); ++i) {
-        request_complete[i].get_future().get();
-        response_complete[i].first.get_future().get();
-      }
-      if (response_errors.size() != 0) {
-        std::string err_str =
-            "failed to run warmup sample '" + sample.sample_name_ + "': ";
-        for (const auto& error : response_errors) {
-          err_str += (error + "; ");
-        }
-        // End warmup as soon as there is failing sample
-        LOG_VERBOSE(1) << "model '" << sample.requests_.back()->ModelName()
-                       << "' instance " << Name()
-                       << " failed to run warmup sample '"
-                       << sample.sample_name_ << "'";
-        return Status(Status::Code::INVALID_ARG, err_str);
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-void
-TritonModelInstance::Execute(
-    std::vector<TRITONBACKEND_Request*>& triton_requests)
-{
-  TRITONBACKEND_ModelInstance* triton_model_instance =
-      reinterpret_cast<TRITONBACKEND_ModelInstance*>(this);
-  TritonBackend::TritonModelInstanceExecFn_t inst_exec_fn =
-      model_->Backend()->ModelInstanceExecFn();
-
-  // If there is an error then we retain ownership of 'requests'
-  // and must send error responses.
-  TRITONSERVER_Error* err = inst_exec_fn(
-      triton_model_instance, &triton_requests[0], triton_requests.size());
-  if (err != nullptr) {
-    Status status = Status(
-        TritonCodeToStatusCode(TRITONSERVER_ErrorCode(err)),
-        TRITONSERVER_ErrorMessage(err));
-    for (TRITONBACKEND_Request* tr : triton_requests) {
-      std::unique_ptr<InferenceRequest> ur(
-          reinterpret_cast<InferenceRequest*>(tr));
-      InferenceRequest::RespondIfError(ur, status, true /* release_requests */);
-    }
-
-    TRITONSERVER_ErrorDelete(err);
-  }
-}
-
-Status
-TritonModelInstance::TritonBackendThread::CreateBackendThread(
-    const std::string name, TritonModelInstance* model_instance, const int nice,
-    const int32_t device_id,
-    std::unique_ptr<TritonBackendThread>* triton_backend_thread)
-{
-  TritonBackendThread* raw_triton_backend_thread =
-      new TritonBackendThread(name, model_instance->Model());
-  std::unique_ptr<TritonBackendThread> runner(raw_triton_backend_thread);
-
-  runner->AddModelInstance(model_instance);
-  runner->backend_thread_ =
-      std::thread([raw_triton_backend_thread, nice, device_id]() {
-        raw_triton_backend_thread->BackendThread(nice, device_id);
-      });
-
-  triton_backend_thread->reset(runner.release());
-
-  return Status::Success;
-}
-
-void
-TritonModelInstance::TritonBackendThread::AddModelInstance(
-    TritonModelInstance* model_instance)
-{
-  model_instances_.push_back(model_instance);
-}
-
-Status
-TritonModelInstance::TritonBackendThread::InitAndWarmUpModelInstance(
-    TritonModelInstance* model_instance)
-{
-  // Initialize the instance on the backend thread
-  auto init_payload = model_->Server()->GetRateLimiter()->GetPayload(
-      Payload::Operation::INIT, model_instance);
-  RETURN_IF_ERROR(
-      model_->Server()->GetRateLimiter()->EnqueuePayload(model_, init_payload));
-  RETURN_IF_ERROR(init_payload->Wait());
-
-  // Warm-up the instance on the backend thread
-  auto warmup_payload = model_->Server()->GetRateLimiter()->GetPayload(
-      Payload::Operation::WARM_UP, model_instance);
-  RETURN_IF_ERROR(model_->Server()->GetRateLimiter()->EnqueuePayload(
-      model_, warmup_payload));
-  RETURN_IF_ERROR(warmup_payload->Wait());
-
-  return Status::Success;
-}
-
-TritonModelInstance::TritonBackendThread::TritonBackendThread(
-    const std::string& name, TritonModel* model)
-    : name_(name), model_(model)
-{
-}
-
-TritonModelInstance::TritonBackendThread::~TritonBackendThread()
-{
-  StopBackendThread();
-}
-
-void
-TritonModelInstance::TritonBackendThread::StopBackendThread()
-{
-  if (backend_thread_.joinable()) {
-    // Signal the backend thread to exit and then wait for it...
-    auto exit_payload = model_->Server()->GetRateLimiter()->GetPayload(
-        Payload::Operation::EXIT, model_instances_.back());
-    model_->Server()->GetRateLimiter()->EnqueuePayload(model_, exit_payload);
-    backend_thread_.join();
-  }
-}
-
-void
-TritonModelInstance::TritonBackendThread::BackendThread(
-    const int nice, const int32_t device_id)
-{
-#ifndef _WIN32
-  if (setpriority(PRIO_PROCESS, syscall(SYS_gettid), nice) == 0) {
-    LOG_VERBOSE(1) << "Starting backend thread for " << name_ << " at nice "
-                   << nice << " on device " << device_id << "...";
-  } else {
-    LOG_VERBOSE(1) << "Starting backend thread for " << name_
-                   << " at default nice (requested nice " << nice << " failed)"
-                   << " on device " << device_id << "...";
-  }
-#else
-  LOG_VERBOSE(1) << "Starting backend thread for " << name_
-                 << " at default nice on device " << device_id << "...";
-#endif
-
-  bool should_exit = false;
-  while (!should_exit) {
-    std::shared_ptr<Payload> payload;
-    model_->Server()->GetRateLimiter()->DequeuePayload(
-        model_instances_, &payload);
-    NVTX_RANGE(nvtx_, "BackendThread " + name_);
-    payload->Execute(&should_exit);
-    model_instances_.push_back(payload->GetInstance());
-    // Release the payload to the RateLimiter
-    model_->Server()->GetRateLimiter()->PayloadRelease(payload);
-  }
-  LOG_VERBOSE(1) << "Stopping backend thread for " << name_ << "...";
-}
-
-extern "C" {
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceName(
-    TRITONBACKEND_ModelInstance* instance, const char** name)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *name = ti->Name().c_str();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceKind(
-    TRITONBACKEND_ModelInstance* instance, TRITONSERVER_InstanceGroupKind* kind)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *kind = ti->Kind();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceDeviceId(
-    TRITONBACKEND_ModelInstance* instance, int32_t* device_id)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *device_id = ti->DeviceId();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceHostPolicy(
-    TRITONBACKEND_ModelInstance* instance, TRITONSERVER_Message** host_policy)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *host_policy = const_cast<TRITONSERVER_Message*>(
-      reinterpret_cast<const TRITONSERVER_Message*>(&ti->HostPolicyMessage()));
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceProfileCount(
-    TRITONBACKEND_ModelInstance* instance, uint32_t* count)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *count = ti->Profiles().size();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceProfileName(
-    TRITONBACKEND_ModelInstance* instance, const uint32_t index,
-    const char** profile_name)
-{
-  *profile_name = nullptr;
-
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  const auto& rprofiles = ti->Profiles();
-  if (index >= rprofiles.size()) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (std::string("out of bounds index ") + std::to_string(index) +
-         ": instance is configured with " + std::to_string(rprofiles.size()) +
-         " profiles")
-            .c_str());
-  }
-
-  *profile_name = rprofiles[index].c_str();
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceSecondaryDeviceCount(
-    TRITONBACKEND_ModelInstance* instance, uint32_t* count)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *count = ti->SecondaryDevices().size();
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceSecondaryDeviceProperties(
-    TRITONBACKEND_ModelInstance* instance, uint32_t index, const char** kind,
-    int64_t* id)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  const auto& rsecondarydevices = ti->SecondaryDevices();
-
-  if (index >= rsecondarydevices.size()) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (std::string("out of bounds index ") + std::to_string(index) +
-         ": instance is configured with " +
-         std::to_string(rsecondarydevices.size()) + " secondary devices")
-            .c_str());
-  }
-
-  *kind = rsecondarydevices[index].kind_.c_str();
-  *id = rsecondarydevices[index].id_;
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceIsPassive(
-    TRITONBACKEND_ModelInstance* instance, bool* is_passive)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *is_passive = ti->IsPassive();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceModel(
-    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Model** model)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *model = reinterpret_cast<TRITONBACKEND_Model*>(ti->Model());
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceState(
-    TRITONBACKEND_ModelInstance* instance, void** state)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *state = ti->State();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceSetState(
-    TRITONBACKEND_ModelInstance* instance, void* state)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  ti->SetState(state);
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceReportStatistics(
-    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request* request,
-    const bool success, const uint64_t exec_start_ns,
-    const uint64_t compute_start_ns, const uint64_t compute_end_ns,
-    const uint64_t exec_end_ns)
-{
-#ifdef TRITON_ENABLE_STATS
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  tr->ReportStatistics(
-      ti->MetricReporter(), success, exec_start_ns, compute_start_ns,
-      compute_end_ns, exec_end_ns);
-#endif  // TRITON_ENABLE_STATS
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceReportBatchStatistics(
-    TRITONBACKEND_ModelInstance* instance, const uint64_t batch_size,
-    const uint64_t exec_start_ns, const uint64_t compute_start_ns,
-    const uint64_t compute_end_ns, const uint64_t exec_end_ns)
-{
-#ifdef TRITON_ENABLE_STATS
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  ti->Model()->MutableStatsAggregator()->UpdateInferBatchStats(
-      ti->MetricReporter(), batch_size, exec_start_ns, compute_start_ns,
-      compute_end_ns, exec_end_ns);
-#endif  // TRITON_ENABLE_STATS
-
-  return nullptr;  // success
-}
-
-}  // extern C
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/backend_model_instance.h
+++ b/3rdparty/core-r22.12/src/backend_model_instance.h
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <functional>
-#include <future>
-#include <memory>
-#include <string>
-#include <thread>
-#include "constants.h"
-#include "memory.h"
-#include "metric_model_reporter.h"
-#include "model_config.pb.h"
-#include "server_message.h"
-#include "status.h"
-#include "triton/common/sync_queue.h"
-
-namespace triton { namespace core {
-
-class TritonModel;
-class InferenceRequest;
-
-//
-// Represents a model instance.
-//
-class TritonModelInstance {
- public:
-  static Status CreateInstances(
-      TritonModel* model,
-      const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
-      const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
-      const inference::ModelConfig& model_config, const bool device_blocking);
-  ~TritonModelInstance();
-
-  const std::string& Name() const { return name_; }
-  size_t Index() const { return index_; }
-  TRITONSERVER_InstanceGroupKind Kind() const { return kind_; }
-  int32_t DeviceId() const { return device_id_; }
-  const triton::common::HostPolicyCmdlineConfig& HostPolicy() const
-  {
-    return host_policy_;
-  }
-  const TritonServerMessage& HostPolicyMessage() const
-  {
-    return host_policy_message_;
-  }
-  bool IsPassive() const { return passive_; }
-  const std::vector<std::string>& Profiles() const { return profile_names_; }
-
-  struct SecondaryDevice {
-    SecondaryDevice(const std::string kind, const int64_t id)
-        : kind_(kind), id_(id)
-    {
-    }
-    const std::string kind_;
-    const int64_t id_;
-  };
-  const std::vector<SecondaryDevice>& SecondaryDevices() const
-  {
-    return secondary_devices_;
-  }
-
-  Status Initialize();
-  Status WarmUp();
-  void Schedule(
-      std::vector<std::unique_ptr<InferenceRequest>>&& requests,
-      const std::function<void()>& OnCompletion);
-
-  TritonModel* Model() const { return model_; }
-  void* State() { return state_; }
-  void SetState(void* state) { state_ = state; }
-
-  MetricModelReporter* MetricReporter() const { return reporter_.get(); }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(TritonModelInstance);
-  class TritonBackendThread;
-  TritonModelInstance(
-      TritonModel* model, const std::string& name, const size_t index,
-      const TRITONSERVER_InstanceGroupKind kind, const int32_t device_id,
-      const std::vector<std::string>& profile_names, const bool passive,
-      const triton::common::HostPolicyCmdlineConfig& host_policy,
-      const TritonServerMessage& host_policy_message,
-      const std::vector<SecondaryDevice>& secondary_devices);
-  static Status CreateInstance(
-      TritonModel* model, const std::string& name, const size_t index,
-      const TRITONSERVER_InstanceGroupKind kind, const int32_t device_id,
-      const std::vector<std::string>& profile_names, const bool passive,
-      const std::string& host_policy_name,
-      const triton::common::HostPolicyCmdlineConfig& host_policy,
-      const inference::ModelRateLimiter& rate_limiter_config,
-      const bool device_blocking,
-      std::map<uint32_t, std::shared_ptr<TritonBackendThread>>*
-          device_to_thread_map,
-      const std::vector<SecondaryDevice>& secondary_devices);
-  Status SetBackendThread(
-      const TRITONSERVER_InstanceGroupKind kind, const int32_t device_id,
-      const bool device_blocking,
-      std::map<uint32_t, std::shared_ptr<TritonBackendThread>>*
-          device_to_thread_map);
-  Status GenerateWarmupData();
-
-  void Execute(std::vector<TRITONBACKEND_Request*>& triton_requests);
-
-  class TritonBackendThread {
-   public:
-    static Status CreateBackendThread(
-        const std::string name, TritonModelInstance* model, const int nice,
-        const int32_t device_id,
-        std::unique_ptr<TritonBackendThread>* triton_backend_thread);
-    void AddModelInstance(TritonModelInstance* model_instance);
-    Status InitAndWarmUpModelInstance(TritonModelInstance* model_instance);
-    void StopBackendThread();
-    ~TritonBackendThread();
-
-   private:
-    TritonBackendThread(const std::string& name, TritonModel* model);
-    void BackendThread(const int nice, const int32_t device_id);
-
-    std::string name_;
-
-    TritonModel* model_;
-    std::deque<TritonModelInstance*> model_instances_;
-
-    std::thread backend_thread_;
-    std::atomic<bool> backend_thread_exit_;
-  };
-  std::shared_ptr<TritonBackendThread> triton_backend_thread_;
-
-  struct WarmupData {
-    WarmupData(const std::string& sample_name, const size_t count)
-        : sample_name_(sample_name), count_(std::max(count, size_t{1}))
-    {
-    }
-
-    std::string sample_name_;
-    size_t count_;
-    // Using a batch of requests to satisfy batch size, this provides better
-    // alignment on the batch expected by the model, especially for sequence
-    // model.
-    std::vector<std::unique_ptr<InferenceRequest>> requests_;
-
-    // Placeholder for input data
-    std::unique_ptr<AllocatedMemory> zero_data_;
-    std::unique_ptr<AllocatedMemory> random_data_;
-    std::vector<std::unique_ptr<std::string>> provided_data_;
-  };
-  std::vector<WarmupData> warmup_samples_;
-
-  // The TritonModel object that owns this instance. The instance
-  // holds this as a raw pointer because the lifetime of the model is
-  // guaranteed to be longer than the lifetime of an instance owned by the
-  // model.
-  TritonModel* model_;
-
-  std::string name_;
-  size_t index_;
-
-  // For CPU device_id_ is always 0. For GPU device_id_ indicates the
-  // GPU device to be used by the instance.
-  TRITONSERVER_InstanceGroupKind kind_;
-  int32_t device_id_;
-  const triton::common::HostPolicyCmdlineConfig host_policy_;
-  TritonServerMessage host_policy_message_;
-  std::vector<std::string> profile_names_;
-  bool passive_;
-
-  std::vector<SecondaryDevice> secondary_devices_;
-
-  // Reporter for metrics, or nullptr if no metrics should be reported
-  std::shared_ptr<MetricModelReporter> reporter_;
-
-  // Opaque state associated with this model instance.
-  void* state_;
-};
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/buffer_attributes.cc
+++ b/3rdparty/core-r22.12/src/buffer_attributes.cc
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "buffer_attributes.h"
-
-#include <cstring>
-#include "constants.h"
-
-namespace triton { namespace core {
-void
-BufferAttributes::SetByteSize(const size_t& byte_size)
-{
-  byte_size_ = byte_size;
-}
-
-void
-BufferAttributes::SetMemoryType(const TRITONSERVER_MemoryType& memory_type)
-{
-  memory_type_ = memory_type;
-}
-
-void
-BufferAttributes::SetMemoryTypeId(const int64_t& memory_type_id)
-{
-  memory_type_id_ = memory_type_id;
-}
-
-void
-BufferAttributes::SetCudaIpcHandle(void* cuda_ipc_handle)
-{
-  char* lcuda_ipc_handle = reinterpret_cast<char*>(cuda_ipc_handle);
-  cuda_ipc_handle_.clear();
-  std::copy(
-      lcuda_ipc_handle, lcuda_ipc_handle + CUDA_IPC_STRUCT_SIZE,
-      std::back_inserter(cuda_ipc_handle_));
-}
-
-void*
-BufferAttributes::CudaIpcHandle()
-{
-  if (cuda_ipc_handle_.empty()) {
-    return nullptr;
-  } else {
-    return reinterpret_cast<void*>(cuda_ipc_handle_.data());
-  }
-}
-
-size_t
-BufferAttributes::ByteSize() const
-{
-  return byte_size_;
-}
-
-TRITONSERVER_MemoryType
-BufferAttributes::MemoryType() const
-{
-  return memory_type_;
-}
-
-int64_t
-BufferAttributes::MemoryTypeId() const
-{
-  return memory_type_id_;
-}
-
-BufferAttributes::BufferAttributes(
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id, char* cuda_ipc_handle)
-    : byte_size_(byte_size), memory_type_(memory_type),
-      memory_type_id_(memory_type_id)
-{
-  // cuda ipc handle size
-  cuda_ipc_handle_.reserve(CUDA_IPC_STRUCT_SIZE);
-
-  if (cuda_ipc_handle != nullptr) {
-    std::copy(
-        cuda_ipc_handle, cuda_ipc_handle + CUDA_IPC_STRUCT_SIZE,
-        std::back_inserter(cuda_ipc_handle_));
-  }
-}
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/buffer_attributes.h
+++ b/3rdparty/core-r22.12/src/buffer_attributes.h
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <iterator>
-#include <vector>
-#include "tritonserver_apis.h"
-
-#pragma once
-
-namespace triton { namespace core {
-//
-// A class to hold information about the buffer allocation.
-//
-class BufferAttributes {
- public:
-  BufferAttributes(
-      size_t byte_size, TRITONSERVER_MemoryType memory_type,
-      int64_t memory_type_id, char cuda_ipc_handle[64]);
-  BufferAttributes()
-  {
-    memory_type_ = TRITONSERVER_MEMORY_CPU;
-    memory_type_id_ = 0;
-    cuda_ipc_handle_.reserve(64);
-  }
-
-  // Set the buffer byte size
-  void SetByteSize(const size_t& byte_size);
-
-  // Set the buffer memory_type
-  void SetMemoryType(const TRITONSERVER_MemoryType& memory_type);
-
-  // Set the buffer memory type id
-  void SetMemoryTypeId(const int64_t& memory_type_id);
-
-  // Set the cuda ipc handle
-  void SetCudaIpcHandle(void* cuda_ipc_handle);
-
-  // Get the cuda ipc handle
-  void* CudaIpcHandle();
-
-  // Get the byte size
-  size_t ByteSize() const;
-
-  // Get the memory type
-  TRITONSERVER_MemoryType MemoryType() const;
-
-  // Get the memory type id
-  int64_t MemoryTypeId() const;
-
- private:
-  size_t byte_size_;
-  TRITONSERVER_MemoryType memory_type_;
-  int64_t memory_type_id_;
-  std::vector<char> cuda_ipc_handle_;
-};
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/constants.h
+++ b/3rdparty/core-r22.12/src/constants.h
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <stdint.h>
-
-namespace triton { namespace core {
-
-constexpr char kInferHeaderContentLengthHTTPHeader[] =
-    "Inference-Header-Content-Length";
-constexpr char kAcceptEncodingHTTPHeader[] = "Accept-Encoding";
-constexpr char kContentEncodingHTTPHeader[] = "Content-Encoding";
-constexpr char kContentTypeHeader[] = "Content-Type";
-constexpr char kContentLengthHeader[] = "Content-Length";
-
-constexpr char kTensorFlowGraphDefPlatform[] = "tensorflow_graphdef";
-constexpr char kTensorFlowSavedModelPlatform[] = "tensorflow_savedmodel";
-constexpr char kTensorFlowGraphDefFilename[] = "model.graphdef";
-constexpr char kTensorFlowSavedModelFilename[] = "model.savedmodel";
-constexpr char kTensorFlowBackend[] = "tensorflow";
-
-constexpr char kTensorRTPlanPlatform[] = "tensorrt_plan";
-constexpr char kTensorRTPlanFilename[] = "model.plan";
-constexpr char kTensorRTBackend[] = "tensorrt";
-
-constexpr char kOnnxRuntimeOnnxPlatform[] = "onnxruntime_onnx";
-constexpr char kOnnxRuntimeOnnxFilename[] = "model.onnx";
-constexpr char kOnnxRuntimeBackend[] = "onnxruntime";
-
-constexpr char kOpenVINORuntimeOpenVINOFilename[] = "model.xml";
-constexpr char kOpenVINORuntimeBackend[] = "openvino";
-
-constexpr char kPyTorchLibTorchPlatform[] = "pytorch_libtorch";
-constexpr char kPyTorchLibTorchFilename[] = "model.pt";
-constexpr char kPyTorchBackend[] = "pytorch";
-
-constexpr char kPythonFilename[] = "model.py";
-constexpr char kPythonBackend[] = "python";
-
-#ifdef TRITON_ENABLE_ENSEMBLE
-constexpr char kEnsemblePlatform[] = "ensemble";
-#endif  // TRITON_ENABLE_ENSEMBLE
-
-constexpr char kTensorRTExecutionAccelerator[] = "tensorrt";
-constexpr char kOpenVINOExecutionAccelerator[] = "openvino";
-constexpr char kGPUIOExecutionAccelerator[] = "gpu_io";
-constexpr char kAutoMixedPrecisionExecutionAccelerator[] =
-    "auto_mixed_precision";
-
-constexpr char kModelConfigPbTxt[] = "config.pbtxt";
-
-constexpr char kMetricsLabelModelName[] = "model";
-constexpr char kMetricsLabelModelVersion[] = "version";
-constexpr char kMetricsLabelGpuUuid[] = "gpu_uuid";
-
-constexpr char kWarmupDataFolder[] = "warmup";
-constexpr char kInitialStateFolder[] = "initial_state";
-
-constexpr uint64_t NANOS_PER_SECOND = 1000000000;
-constexpr uint64_t NANOS_PER_MILLIS = 1000000;
-constexpr int MAX_GRPC_MESSAGE_SIZE = INT32_MAX;
-constexpr uint64_t SEQUENCE_IDLE_DEFAULT_MICROSECONDS = 1000 * 1000;
-constexpr size_t STRING_CORRELATION_ID_MAX_LENGTH_BYTES = 128;
-constexpr size_t CUDA_IPC_STRUCT_SIZE = 64;
-
-#ifdef TRITON_ENABLE_METRICS
-// MetricModelReporter expects a device ID for GPUs, but we reuse this device
-// ID for other metrics as well such as for CPU and Response Cache metrics
-constexpr int METRIC_REPORTER_ID_CPU = -1;
-constexpr int METRIC_REPORTER_ID_RESPONSE_CACHE = -2;
-#endif
-
-#define TIMESPEC_TO_NANOS(TS) \
-  ((TS).tv_sec * triton::core::NANOS_PER_SECOND + (TS).tv_nsec)
-#define TIMESPEC_TO_MILLIS(TS) \
-  (TIMESPEC_TO_NANOS(TS) / triton::core::NANOS_PER_MILLIS)
-
-#define DISALLOW_MOVE(TypeName) TypeName(Context&& o) = delete;
-#define DISALLOW_COPY(TypeName) TypeName(const TypeName&) = delete;
-#define DISALLOW_ASSIGN(TypeName) void operator=(const TypeName&) = delete;
-#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
-  DISALLOW_COPY(TypeName)                  \
-  DISALLOW_ASSIGN(TypeName)
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/cuda_memory_manager.cc
+++ b/3rdparty/core-r22.12/src/cuda_memory_manager.cc
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#include "cuda_memory_manager.h"
-
-#include <cnmem.h>
-#include <string.h>
-#include <set>
-#include "cuda_utils.h"
-#include "triton/common/logging.h"
-
-namespace {
-
-#define RETURN_IF_CNMEM_ERROR(S, MSG)                    \
-  do {                                                   \
-    auto status__ = (S);                                 \
-    if (status__ != CNMEM_STATUS_SUCCESS) {              \
-      return Status(                                     \
-          Status::Code::INTERNAL,                        \
-          (MSG) + ": " + cnmemGetErrorString(status__)); \
-    }                                                    \
-  } while (false)
-
-std::string
-PointerToString(void* ptr)
-{
-  std::stringstream ss;
-  ss << ptr;
-  return ss.str();
-}
-
-}  // namespace
-
-namespace triton { namespace core {
-
-std::unique_ptr<CudaMemoryManager> CudaMemoryManager::instance_;
-std::mutex CudaMemoryManager::instance_mu_;
-
-CudaMemoryManager::~CudaMemoryManager()
-{
-  if (has_allocation_) {
-    auto status = cnmemFinalize();
-    if (status != CNMEM_STATUS_SUCCESS) {
-      LOG_ERROR << "Failed to finalize CUDA memory manager: [" << status << "] "
-                << cnmemGetErrorString(status);
-    }
-  }
-}
-
-void
-CudaMemoryManager::Reset()
-{
-  std::lock_guard<std::mutex> lock(instance_mu_);
-  instance_.reset();
-}
-
-Status
-CudaMemoryManager::Create(const CudaMemoryManager::Options& options)
-{
-  // Ensure thread-safe creation of CUDA memory pool
-  std::lock_guard<std::mutex> lock(instance_mu_);
-  if (instance_ != nullptr) {
-    LOG_WARNING << "New CUDA memory pools could not be created since they "
-                   "already exists";
-    return Status::Success;
-  }
-
-  std::set<int> supported_gpus;
-  auto status = GetSupportedGPUs(
-      &supported_gpus, options.min_supported_compute_capability_);
-  if (status.IsOk()) {
-    std::vector<cnmemDevice_t> devices;
-    for (auto gpu : supported_gpus) {
-      const auto it = options.memory_pool_byte_size_.find(gpu);
-      if ((it != options.memory_pool_byte_size_.end()) && (it->second != 0)) {
-        devices.emplace_back();
-        auto& device = devices.back();
-        memset(&device, 0, sizeof(device));
-        device.device = gpu;
-        device.size = it->second;
-
-        LOG_INFO << "CUDA memory pool is created on device " << device.device
-                 << " with size " << device.size;
-      }
-    }
-
-    if (!devices.empty()) {
-      RETURN_IF_CNMEM_ERROR(
-          cnmemInit(devices.size(), devices.data(), CNMEM_FLAGS_CANNOT_GROW),
-          std::string("Failed to finalize CUDA memory manager"));
-    } else {
-      LOG_INFO << "CUDA memory pool disabled";
-    }
-
-    // Use to finalize CNMeM properly when out of scope
-    instance_.reset(new CudaMemoryManager(!devices.empty()));
-  } else {
-    return Status(
-        status.ErrorCode(),
-        "Failed to initialize CUDA memory manager: " + status.Message());
-  }
-
-  return Status::Success;
-}
-
-Status
-CudaMemoryManager::Alloc(void** ptr, uint64_t size, int64_t device_id)
-{
-  if (instance_ == nullptr) {
-    return Status(
-        Status::Code::UNAVAILABLE, "CudaMemoryManager has not been created");
-  } else if (!instance_->has_allocation_) {
-    return Status(
-        Status::Code::UNAVAILABLE,
-        "CudaMemoryManager has no preallocated CUDA memory");
-  }
-
-  int current_device;
-  RETURN_IF_CUDA_ERR(
-      cudaGetDevice(&current_device), std::string("Failed to get device"));
-  bool overridden = (current_device != device_id);
-  if (overridden) {
-    RETURN_IF_CUDA_ERR(
-        cudaSetDevice(device_id), std::string("Failed to set device"));
-  }
-
-  // Defer returning error to make sure the device is recovered
-  auto err = cnmemMalloc(ptr, size, nullptr);
-
-  if (overridden) {
-    cudaSetDevice(current_device);
-  }
-
-  RETURN_IF_CNMEM_ERROR(
-      err, std::string("Failed to allocate CUDA memory with byte size ") +
-               std::to_string(size) + " on GPU " + std::to_string(device_id));
-  return Status::Success;
-}
-
-Status
-CudaMemoryManager::Free(void* ptr, int64_t device_id)
-{
-  if (instance_ == nullptr) {
-    return Status(
-        Status::Code::UNAVAILABLE, "CudaMemoryManager has not been created");
-  } else if (!instance_->has_allocation_) {
-    return Status(
-        Status::Code::UNAVAILABLE,
-        "CudaMemoryManager has no preallocated CUDA memory");
-  }
-
-  int current_device;
-  RETURN_IF_CUDA_ERR(
-      cudaGetDevice(&current_device), std::string("Failed to get device"));
-  bool overridden = (current_device != device_id);
-  if (overridden) {
-    RETURN_IF_CUDA_ERR(
-        cudaSetDevice(device_id), std::string("Failed to set device"));
-  }
-
-  // Defer returning error to make sure the device is recovered
-  auto err = cnmemFree(ptr, nullptr);
-
-  if (overridden) {
-    cudaSetDevice(current_device);
-  }
-
-  RETURN_IF_CNMEM_ERROR(
-      err, std::string("Failed to deallocate CUDA memory at address ") +
-               PointerToString(ptr) + " on GPU " + std::to_string(device_id));
-  return Status::Success;
-}
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/cuda_memory_manager.h
+++ b/3rdparty/core-r22.12/src/cuda_memory_manager.h
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include <map>
-#include <memory>
-#include <mutex>
-#include "status.h"
-
-namespace triton { namespace core {
-
-// This is a singleton class responsible for maintaining CUDA memory pool
-// used by the inference server. CUDA memory allocations and deallocations
-// must be requested via functions provided by this class.
-class CudaMemoryManager {
- public:
-  // Options to configure CUDA memory manager.
-  struct Options {
-    Options(double cc = 6.0, const std::map<int, uint64_t>& s = {})
-        : min_supported_compute_capability_(cc), memory_pool_byte_size_(s)
-    {
-    }
-
-    // The minimum compute capability of the supported devices.
-    double min_supported_compute_capability_;
-
-    // The size of CUDA memory reserved for the specified devices.
-    // The memory size will be rounded up to align with
-    // the default granularity (512 bytes).
-    // No memory will be reserved for devices that is not listed.
-    std::map<int, uint64_t> memory_pool_byte_size_;
-  };
-
-  ~CudaMemoryManager();
-
-  // Create the memory manager based on 'options' specified.
-  // Return Status object indicating success or failure.
-  static Status Create(const Options& options);
-
-  // Allocate CUDA memory on GPU 'device_id' with
-  // the requested 'size' and return the pointer in 'ptr'.
-  // Return Status object indicating success or failure.
-  static Status Alloc(void** ptr, uint64_t size, int64_t device_id);
-
-  // Free the memory allocated by the memory manager on 'device_id'.
-  // Return Status object indicating success or failure.
-  static Status Free(void* ptr, int64_t device_id);
-
- protected:
-  // Provide explicit control on the lifecycle of the CUDA memory manager,
-  // for testing only.
-  static void Reset();
-
- private:
-  CudaMemoryManager(bool has_allocation) : has_allocation_(has_allocation) {}
-  bool has_allocation_;
-  static std::unique_ptr<CudaMemoryManager> instance_;
-  static std::mutex instance_mu_;
-};
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/cuda_utils.cc
+++ b/3rdparty/core-r22.12/src/cuda_utils.cc
-// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "cuda_utils.h"
-
-#include "model_config_utils.h"
-#include "triton/common/nvtx.h"
-
-namespace triton { namespace core {
-
-#ifdef TRITON_ENABLE_GPU
-void CUDART_CB
-MemcpyHost(void* args)
-{
-  auto* copy_params = reinterpret_cast<CopyParams*>(args);
-  memcpy(copy_params->dst_, copy_params->src_, copy_params->byte_size_);
-  delete copy_params;
-}
-#endif  // TRITON_ENABLE_GPU
-
-Status
-GetDeviceMemoryInfo(const int device_id, size_t* free, size_t* total)
-{
-  *free = 0;
-  *total = 0;
-#ifdef TRITON_ENABLE_GPU
-  // Make sure that correct device is set before creating stream and
-  // then restore the device to what was set by the caller.
-  int current_device;
-  auto cuerr = cudaGetDevice(&current_device);
-  bool overridden = false;
-  if (cuerr == cudaSuccess) {
-    overridden = (current_device != device_id);
-    if (overridden) {
-      cuerr = cudaSetDevice(device_id);
-    }
-  }
-
-  if (cuerr == cudaSuccess) {
-    cuerr = cudaMemGetInfo(free, total);
-  }
-
-  if (overridden) {
-    cudaSetDevice(current_device);
-  }
-
-  if (cuerr != cudaSuccess) {
-    return Status(
-        Status::Code::INTERNAL,
-        (std::string("unable to get memory info for device ") +
-         std::to_string(device_id) + ": " + cudaGetErrorString(cuerr)));
-  }
-#endif  // TRITON_ENABLE_GPU
-  return Status::Success;
-}
-
-Status
-EnablePeerAccess(const double min_compute_capability)
-{
-#ifdef TRITON_ENABLE_GPU
-  // If we can't enable peer access for one device pair, the best we can
-  // do is skipping it...
-  std::set<int> supported_gpus;
-  bool all_enabled = false;
-  if (GetSupportedGPUs(&supported_gpus, min_compute_capability).IsOk()) {
-    all_enabled = true;
-    int can_access_peer = false;
-    for (const auto& host : supported_gpus) {
-      auto cuerr = cudaSetDevice(host);
-
-      if (cuerr == cudaSuccess) {
-        for (const auto& peer : supported_gpus) {
-          if (host == peer) {
-            continue;
-          }
-
-          cuerr = cudaDeviceCanAccessPeer(&can_access_peer, host, peer);
-          if ((cuerr == cudaSuccess) && (can_access_peer == 1)) {
-            cuerr = cudaDeviceEnablePeerAccess(peer, 0);
-          }
-
-          all_enabled &= ((cuerr == cudaSuccess) && (can_access_peer == 1));
-        }
-      }
-    }
-  }
-  if (!all_enabled) {
-    return Status(
-        Status::Code::UNSUPPORTED,
-        "failed to enable peer access for some device pairs");
-  }
-#endif  // TRITON_ENABLE_GPU
-  return Status::Success;
-}
-
-Status
-CopyBuffer(
-    const std::string& msg, const TRITONSERVER_MemoryType src_memory_type,
-    const int64_t src_memory_type_id,
-    const TRITONSERVER_MemoryType dst_memory_type,
-    const int64_t dst_memory_type_id, const size_t byte_size, const void* src,
-    void* dst, cudaStream_t cuda_stream, bool* cuda_used, bool copy_on_stream)
-{
-  NVTX_RANGE(nvtx_, "CopyBuffer");
-
-  *cuda_used = false;
-
-  // For CUDA memcpy, all host to host copy will be blocked in respect to the
-  // host, so use memcpy() directly. In this case, need to be careful on whether
-  // the src buffer is valid.
-  if ((src_memory_type != TRITONSERVER_MEMORY_GPU) &&
-      (dst_memory_type != TRITONSERVER_MEMORY_GPU)) {
-#ifdef TRITON_ENABLE_GPU
-    if (copy_on_stream) {
-      auto params = new CopyParams(dst, src, byte_size);
-      cudaLaunchHostFunc(
-          cuda_stream, MemcpyHost, reinterpret_cast<void*>(params));
-      *cuda_used = true;
-    } else {
-      memcpy(dst, src, byte_size);
-    }
-#else
-    memcpy(dst, src, byte_size);
-#endif  // TRITON_ENABLE_GPU
-  } else {
-#ifdef TRITON_ENABLE_GPU
-    RETURN_IF_CUDA_ERR(
-        cudaMemcpyAsync(dst, src, byte_size, cudaMemcpyDefault, cuda_stream),
-        msg + ": failed to perform CUDA copy");
-
-    *cuda_used = true;
-#else
-    return Status(
-        Status::Code::INTERNAL,
-        msg + ": try to use CUDA copy while GPU is not supported");
-#endif  // TRITON_ENABLE_GPU
-  }
-
-  return Status::Success;
-}
-
-void
-CopyBufferHandler(
-    const std::string& msg, const TRITONSERVER_MemoryType src_memory_type,
-    const int64_t src_memory_type_id,
-    const TRITONSERVER_MemoryType dst_memory_type,
-    const int64_t dst_memory_type_id, const size_t byte_size, const void* src,
-    void* dst, cudaStream_t cuda_stream, void* response_ptr,
-    triton::common::SyncQueue<std::tuple<Status, bool, void*>>*
-        completion_queue)
-{
-  bool cuda_used = false;
-  Status status = CopyBuffer(
-      msg, src_memory_type, src_memory_type_id, dst_memory_type,
-      dst_memory_type_id, byte_size, src, dst, cuda_stream, &cuda_used);
-  completion_queue->Put(std::make_tuple(status, cuda_used, response_ptr));
-}
-
-#ifdef TRITON_ENABLE_GPU
-Status
-CheckGPUCompatibility(const int gpu_id, const double min_compute_capability)
-{
-  // Query the compute capability from the device
-  cudaDeviceProp cuprops;
-  cudaError_t cuerr = cudaGetDeviceProperties(&cuprops, gpu_id);
-  if (cuerr != cudaSuccess) {
-    return Status(
-        Status::Code::INTERNAL,
-        "unable to get CUDA device properties for GPU ID" +
-            std::to_string(gpu_id) + ": " + cudaGetErrorString(cuerr));
-  }
-
-  double compute_compability = cuprops.major + (cuprops.minor / 10.0);
-  if ((compute_compability > min_compute_capability) ||
-      (abs(compute_compability - min_compute_capability) < 0.01)) {
-    return Status::Success;
-  } else {
-    return Status(
-        Status::Code::UNSUPPORTED,
-        "gpu " + std::to_string(gpu_id) + " has compute capability '" +
-            std::to_string(cuprops.major) + "." +
-            std::to_string(cuprops.minor) +
-            "' which is less than the minimum supported of '" +
-            std::to_string(min_compute_capability) + "'");
-  }
-}
-
-Status
-GetSupportedGPUs(
-    std::set<int>* supported_gpus, const double min_compute_capability)
-{
-  // Make sure set is empty before starting
-  supported_gpus->clear();
-
-  int device_cnt;
-  cudaError_t cuerr = cudaGetDeviceCount(&device_cnt);
-  if ((cuerr == cudaErrorNoDevice) || (cuerr == cudaErrorInsufficientDriver)) {
-    device_cnt = 0;
-  } else if (cuerr != cudaSuccess) {
-    return Status(
-        Status::Code::INTERNAL, "unable to get number of CUDA devices: " +
-                                    std::string(cudaGetErrorString(cuerr)));
-  }
-
-  // populates supported_gpus
-  for (int gpu_id = 0; gpu_id < device_cnt; gpu_id++) {
-    Status status = CheckGPUCompatibility(gpu_id, min_compute_capability);
-    if (status.IsOk()) {
-      supported_gpus->insert(gpu_id);
-    }
-  }
-  return Status::Success;
-}
-
-Status
-SupportsIntegratedZeroCopy(const int gpu_id, bool* zero_copy_support)
-{
-  // Query the device to check if integrated
-  cudaDeviceProp cuprops;
-  cudaError_t cuerr = cudaGetDeviceProperties(&cuprops, gpu_id);
-  if (cuerr != cudaSuccess) {
-    return Status(
-        Status::Code::INTERNAL,
-        "unable to get CUDA device properties for GPU ID" +
-            std::to_string(gpu_id) + ": " + cudaGetErrorString(cuerr));
-  }
-
-  // Zero-copy supported only on integrated GPU when it can map host memory
-  if (cuprops.integrated && cuprops.canMapHostMemory) {
-    *zero_copy_support = true;
-  } else {
-    *zero_copy_support = false;
-  }
-
-  return Status::Success;
-}
-
-#endif
-
-}}  // namespace triton::core