Adapt to 0.1.0

0a21fff9 · xiabo · 9484fd1c · 0a21fff9 · 0a21fff9 · 0a21fff9
Commit 0a21fff9 authored Dec 20, 2023 by xiabo
20 changed files
--- a/3rdparty/backend-r22.12/.clang-format
+++ b/3rdparty/backend-r22.12/.clang-format
+---
+BasedOnStyle: Google
+
+IndentWidth: 2
+ContinuationIndentWidth: 4
+UseTab: Never
+MaxEmptyLinesToKeep: 2
+
+SortIncludes: true
+CompactNamespaces: true
+ReflowComments: true
+
+DerivePointerAlignment: false
+PointerAlignment: Left
+
+AllowShortIfStatementsOnASingleLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Inline
+
+AlwaysBreakAfterReturnType: TopLevelDefinitions
+AlignAfterOpenBracket: AlwaysBreak
+BreakBeforeBraces: Custom
+BraceWrapping:
+  AfterClass: false
+  AfterControlStatement: false
+  AfterEnum: false
+  AfterFunction: true
+  AfterNamespace: false
+  AfterStruct: false
+  AfterUnion: false
+  BeforeCatch: true
+
+BinPackArguments: true
+BinPackParameters: true
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+
+IndentCaseLabels: true
\ No newline at end of file
--- a/3rdparty/backend-r22.12/.gitignore
+++ b/3rdparty/backend-r22.12/.gitignore
+/build
+/.vscode
+*.so
--- a/3rdparty/backend-r22.12/CMakeLists.txt
+++ b/3rdparty/backend-r22.12/CMakeLists.txt
+# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#cmake_minimum_required(VERSION 3.17)
+cmake_minimum_required(VERSION 3.16)
+
+project(tritonbackend LANGUAGES C CXX)
+
+#
+# Options
+#
+option(TRITON_ENABLE_GPU "Enable GPU support in backend utilities" ON)
+option(TRITON_ENABLE_MALI_GPU "Enable Arm MALI GPU support in backend utilities" OFF)
+option(TRITON_ENABLE_STATS "Include statistics collections in backend utilities" ON)
+
+set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
+set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
+
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Release)
+endif()
+
+#
+# Dependencies
+#
+include(FetchContent)
+
+FetchContent_Declare(
+  repo-common
+  GIT_REPOSITORY https://github.com/triton-inference-server/common.git
+  GIT_TAG ${TRITON_COMMON_REPO_TAG}
+  GIT_SHALLOW ON
+)
+FetchContent_Declare(
+  repo-core
+  GIT_REPOSITORY https://github.com/triton-inference-server/core.git
+  GIT_TAG ${TRITON_CORE_REPO_TAG}
+  GIT_SHALLOW ON
+)
+FetchContent_MakeAvailable(repo-common repo-core)
+
+#
+# CUDA
+#
+if(${TRITON_ENABLE_GPU})
+  #find_package(CUDAToolkit REQUIRED)
+  find_package(CUDA REQUIRED)
+  message(STATUS "Using CUDA ${CUDA_VERSION}")
+  set(CUDA_NVCC_FLAGS -std=c++11)
+
+  if(CUDA_VERSION VERSION_GREATER "10.1" OR CUDA_VERSION VERSION_EQUAL "10.1")
+    add_definitions(-DTRITON_ENABLE_CUDA_GRAPH=1)
+  else()
+    message(WARNING "CUDA ${CUDA_VERSION} does not support CUDA graphs.")
+  endif()
+endif() # TRITON_ENABLE_GPU
+
+#
+# Backend library containing useful source and utilities
+#
+set(SRC_FILES 
+  "src/backend_common.cc"
+  "src/backend_input_collector.cc"
+  "src/backend_memory.cc"
+  "src/backend_model_instance.cc"
+  "src/backend_model.cc"
+  "src/backend_output_responder.cc"
+)
+
+if(${TRITON_ENABLE_GPU})
+  set(SRC_FILES ${SRC_FILES} "src/kernel.h")
+endif() # TRITON_ENABLE_GPU
+
+add_library(
+  triton-backend-utils
+  ${SRC_FILES}
+)
+
+if(${TRITON_ENABLE_GPU})
+  set(HOST_COMPILER_FLAGS "")
+  if (WIN32)
+    set(HOST_COMPILER_FLAGS "/MD")
+  else()
+    set(HOST_COMPILER_FLAGS "-fPIC")
+  endif()
+
+  set(CUDA_LIBRARIES PUBLIC ${CUDA_LIBRARIES})
+  cuda_add_library(
+    kernel-library-new
+    src/kernel.cu src/kernel.h
+    OPTIONS -arch compute_53
+    OPTIONS -code compute_53,sm_53,sm_60,sm_61,sm_62,sm_70,sm_72,sm_75
+    OPTIONS -Xcompiler ${HOST_COMPILER_FLAGS}
+  )
+endif() # TRITON_ENABLE_GPU
+
+add_library(
+  TritonBackend::triton-backend-utils ALIAS triton-backend-utils
+)
+
+target_include_directories(
+  triton-backend-utils
+  PUBLIC
+    $<INSTALL_INTERFACE:include>
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+  PRIVATE
+    ${CMAKE_CURRENT_SOURCE_DIR}/src
+)
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+  message("Using MSVC as compiler, default target on Windows 10. "
+      "If the target system is not Windows 10, please update _WIN32_WINNT "
+      "to corresponding value.")
+endif()
+target_compile_features(triton-backend-utils PRIVATE cxx_std_11)
+target_compile_options(
+  triton-backend-utils
+  PRIVATE
+  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
+    -Wall -Wextra -Wno-unused-parameter -Werror>
+  $<$<CXX_COMPILER_ID:MSVC>:/Wall /D_WIN32_WINNT=0x0A00 /EHsc>
+)
+
+# TRITON_ENABLE_GPU exposed in header so set PUBLIC
+if(${TRITON_ENABLE_GPU})
+target_compile_definitions(
+  triton-backend-utils
+  PUBLIC TRITON_ENABLE_GPU=1
+)
+endif() # TRITON_ENABLE_GPU
+
+# TRITON_ENABLE_MALI_GPU exposed in header so set PUBLIC
+if(${TRITON_ENABLE_MALI_GPU})
+target_compile_definitions(
+  triton-backend-utils
+  PUBLIC TRITON_ENABLE_MALI_GPU=1
+)
+endif() # TRITON_ENABLE_MALI_GPU
+
+# TRITON_ENABLE_STATS exposed in header so set PUBLIC
+if(${TRITON_ENABLE_STATS})
+target_compile_definitions(
+  triton-backend-utils
+  PUBLIC TRITON_ENABLE_STATS=1
+)
+endif() # TRITON_ENABLE_STATS
+
+set_target_properties(
+  triton-backend-utils PROPERTIES
+  WINDOWS_EXPORT_ALL_SYMBOLS TRUE
+  POSITION_INDEPENDENT_CODE ON
+  OUTPUT_NAME tritonbackendutils
+)
+
+target_link_libraries(
+  triton-backend-utils
+  PUBLIC
+    triton-core-backendapi         # from repo-core
+    triton-core-serverapi          # from repo-core
+    triton-common-async-work-queue # from repo-common
+    triton-common-json             # from repo-common
+)
+
+if(${TRITON_ENABLE_GPU})
+  target_link_libraries(
+    triton-backend-utils
+    PUBLIC
+      #CUDA::cudart
+      cudart
+    PRIVATE
+    kernel-library-new
+  )
+endif() # TRITON_ENABLE_GPU
+
+#
+# Install
+#
+include(GNUInstallDirs)
+set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonBackend)
+
+install(
+  TARGETS
+    triton-backend-utils
+  EXPORT
+    triton-backend-targets
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+)
+
+if(${TRITON_ENABLE_GPU})
+  install(
+    TARGETS
+      kernel-library-new
+    EXPORT
+      triton-backend-targets
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  )
+endif() # TRITON_ENABLE_GPU
+
+install(
+  DIRECTORY include/
+  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+)
+
+install(
+  EXPORT
+    triton-backend-targets
+  FILE
+    TritonBackendTargets.cmake
+  NAMESPACE
+    TritonBackend::
+  DESTINATION
+    ${INSTALL_CONFIGDIR}
+)
+
+include(CMakePackageConfigHelpers)
+configure_package_config_file(
+  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonBackendConfig.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonBackendConfig.cmake
+  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+install(
+  FILES
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonBackendConfig.cmake
+  DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+#
+# Export from build tree
+#
+export(
+  EXPORT triton-backend-targets
+  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonBackendTargets.cmake
+  NAMESPACE TritonBackend::
+)
+
+export(PACKAGE TritonBackend)
--- a/3rdparty/backend-r22.12/LICENSE
+++ b/3rdparty/backend-r22.12/LICENSE
+Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+ * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+ * Neither the name of NVIDIA CORPORATION nor the names of its
+   contributors may be used to endorse or promote products derived
+   from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/3rdparty/backend-r22.12/README.md
+++ b/3rdparty/backend-r22.12/README.md
+<!--
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
+
+# Triton Inference Server Backend
+
+A Triton *backend* is the implementation that executes a model. A
+backend can be a wrapper around a deep-learning framework, like
+PyTorch, TensorFlow, TensorRT or ONNX Runtime. Or a backend can be
+custom C/C++ logic performing any operation (for example, image
+pre-processing).
+
+This repo contains documentation on Triton backends and also source,
+scripts and utilities for creating Triton backends. You do not need to
+use anything provided in this repo to create a Triton backend but you
+will likely find its contents useful.
+
+## Frequently Asked Questions
+
+Full documentation is included below but these shortcuts can help you
+get started in the right direction.
+
+### Where can I ask general questions about Triton and Triton backends?
+
+Be sure to read all the information below as well as the [general
+Triton
+documentation](https://github.com/triton-inference-server/server#triton-inference-server)
+available in the main
+[server](https://github.com/triton-inference-server/server) repo. If
+you don't find your answer there you can ask questions on the main
+Triton [issues
+page](https://github.com/triton-inference-server/server/issues).
+
+### Where can I find all the backends that are available for Triton?
+
+Anyone can develop a Triton backend, so it isn't possible for us to
+know about all available backends. But the Triton project does provide
+a set of supported backends that are tested and updated with each
+Triton release.
+
+**TensorRT**: The TensorRT backend is used to execute TensorRT
+models. The
+[server](https://github.com/triton-inference-server/tensorrt_backend)
+repo contains the source for the backend.
+
+**ONNX Runtime**: The ONNX Runtime backend is used to execute ONNX
+models. The
+[onnxruntime_backend](https://github.com/triton-inference-server/onnxruntime_backend)
+repo contains the documentation and source for the backend.
+
+**TensorFlow**: The TensorFlow backend is used to execute TensorFlow
+models in both GraphDef and SavedModel formats. The same backend is
+used to execute both TensorFlow 1 and TensorFlow 2 models. The
+[tensorflow_backend](https://github.com/triton-inference-server/tensorflow_backend)
+repo contains the documentation and source for the backend.
+
+**PyTorch**: The PyTorch backend is used to execute TorchScript
+models. The
+[pytorch_backend](https://github.com/triton-inference-server/pytorch_backend)
+repo contains the documentation and source for the backend.
+
+**OpenVINO**: The OpenVINO backend is used to execute
+[OpenVINO](https://docs.openvinotoolkit.org/latest/index.html)
+models. The
+[openvino_backend](https://github.com/triton-inference-server/openvino_backend)
+repo contains the documentation and source for the backend.
+
+**Python**: The Python backend allows you to write your model logic in
+Python. For example, you can use this backend to execute pre/post
+processing code written in Python, or to execute a PyTorch Python
+script directly (instead of first converting it to TorchScript and
+then using the PyTorch backend). The
+[python_backend](https://github.com/triton-inference-server/python_backend)
+repo contains the documentation and source for the backend.
+
+**DALI**: [DALI](https://github.com/NVIDIA/DALI) is a collection of
+highly optimized building blocks and an execution engine that
+accelerates the pre-processing of the input data for deep learning
+applications. The DALI backend allows you to execute your DALI
+pipeline within Triton. The
+[dali_backend](https://github.com/triton-inference-server/dali_backend)
+repo contains the documentation and source for the backend.
+
+**FIL**: The FIL ([Forest Inference
+Library](https://github.com/rapidsai/cuml/tree/branch-21.10/python/cuml/fil))
+backend is used to execute a variety of tree-based ML models, including
+XGBoost models, LightGBM models, Scikit-Learn random forest models, and cuML
+random forest models. The
+[fil_backend](https://github.com/triton-inference-server/fil_backend) repo
+contains the documentation and source for the backend.
+
+**Important Note!** Not all the above backends are supported on every platform
+supported by Triton. Look at the
+[Backend-Platform Support Matrix](docs/backend_platform_support_matrix.md)
+to learn about the same.
+
+### How can I develop my own Triton backend?
+
+First you probably want to ask on the main Triton [issues
+page](https://github.com/triton-inference-server/server/issues) to
+make sure you are not duplicating a backend that already exists. Then
+follow the [tutorial](examples/README.md) to learn how to create your
+first simple Triton backend and incrementally improve it to add more
+features. You should also read the complete documentation on [Triton
+backends](#backends).
+
+### Can I add (or remove) a backend to an existing Triton installation?
+
+Yes. See [Backend Shared Library](#backend-shared-library) for general
+information about how the shared library implementing a backend is
+managed by Triton, and [Triton with Unsupported and Custom
+Backends](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/compose.md#triton-with-unsupported-and-custom-backends)
+for documentation on how to add your backend to the released Triton
+Docker image. For a standard install the globally available backends
+are in /opt/tritonserver/backends.
+
+### What about backends developed using the "legacy custom backend" API.
+
+The legacy custom API is removed from Triton. If you have custom
+backends that you developed using this older API you must port them to
+the new [Triton Backend API](#triton-backend-api).
+
+## Backends
+
+A Triton *backend* is the implementation that executes a model. A
+backend can be a wrapper around a deep-learning framework, like
+PyTorch, TensorFlow, TensorRT, ONNX Runtime or OpenVINO. A backend can
+also implement any functionality you want as long as it adheres to the
+[backend API](#triton-backend-api). Triton uses this API to send
+requests to the backend for execution and the backend uses the API to
+communicate with Triton.
+
+Every model must be associated with a backend. A model's backend is
+specified in the model's configuration using the 'backend' setting. 
+For using TensorRT backend, the value of this setting should be *tensorrt*. 
+Similarly, for using PyTorch, ONNX and TensorFlow Backends, the `backend` 
+field should be set to *pytorch*, *onnxruntime* or *tensorflow* respectively. 
+For all other backends, 'backend' must be set to the name of the backend.
+
+### Backend Shared Library
+
+Each backend must be implemented as a shared library and the name of
+the shared library must be *libtriton_\<backend-name\>.so*. For
+example, if the name of the backend is "mybackend", a model indicates
+that it uses the backend by setting the model configuration 'backend'
+setting to "mybackend", and Triton looks for *libtriton_mybackend.so*
+as the shared library that implements the backend. The
+[tutorial](examples/README.md) shows examples of how to build your
+backend logic into the appropriate shared library.
+
+For a model, *M* that specifies backend *B*, Triton searches for the
+backend shared library in the following places, in this order:
+
+* \<model_repository\>/M/\<version_directory\>/libtriton_B.so
+
+* \<model_repository\>/M/libtriton_B.so
+
+* \<global_backend_directory\>/B/libtriton_B.so
+
+Where \<global_backend_directory\> is by default
+/opt/tritonserver/backends.  The --backend-directory flag can be used
+to override the default.
+
+Typically you will install your backend into the global backend
+directory. For example, if using Triton Docker images you can follow
+the instructions in [Triton with Unsupported and Custom
+Backends](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/compose.md#triton-with-unsupported-and-custom-backends). Continuing
+the example of a backend names "mybackend", you would install into the
+Triton image as:
+
+```
+/opt/
+  tritonserver/
+    backends/
+      mybackend/
+        libtriton_mybackend.so
+        ... # other files needed by mybackend
+```
+
+### Triton Backend API
+
+A Triton backend must implement the C interface defined in
+[tritonbackend.h](https://github.com/triton-inference-server/core/tree/main/include/triton/core/tritonbackend.h). The
+following abstractions are used by the API.
+
+#### TRITONBACKEND_Backend
+
+A TRITONBACKEND_Backend object represents the backend itself. The
+same backend object is shared across all models that use the
+backend. The associated API, like TRITONBACKEND_BackendName, is used
+to get information about the backend and to associate a user-defined
+state with the backend.
+
+A backend can optionally implement TRITONBACKEND_Initialize and
+TRITONBACKEND_Finalize to get notification of when the backend object
+is created and destroyed (for more information see [backend
+lifecycles](#backend-lifecycles)).
+
+#### TRITONBACKEND_Model
+
+A TRITONBACKEND_Model object represents a model. Each model loaded by
+Triton is associated with a TRITONBACKEND_Model. Each model can use
+the TRITONBACKEND_ModelBackend API to get the backend object
+representing the backend that is used by the model.
+
+The same model object is shared across all instances of that
+model. The associated API, like TRITONBACKEND_ModelName, is used to
+get information about the model and to associate a user-defined state
+with the model.
+
+Most backends will implement TRITONBACKEND_ModelInitialize and
+TRITONBACKEND_ModelFinalize to initialize the backend for a given
+model and to manage the user-defined state associated with the model
+(for more information see [backend lifecycles](#backend-lifecycles)).
+
+The backend must take into account threading concerns when
+implementing TRITONBACKEND_ModelInitialize and
+TRITONBACKEND_ModelFinalize.  Triton will not perform multiple
+simultaneous calls to these functions for a given model; however, if a
+backend is used by multiple models Triton may simultaneously call the
+functions with a different thread for each model. As a result, the
+backend must be able to handle multiple simultaneous calls to the
+functions. Best practice for backend implementations is to use only
+function-local and model-specific user-defined state in these
+functions, as is shown in the [tutorial](examples/README.md).
+
+#### TRITONBACKEND_ModelInstance
+
+A TRITONBACKEND_ModelInstance object represents a model
+*instance*. Triton creates one or more instances of the model based on
+the *instance_group* settings specified in the model
+configuration. Each of these instances is associated with a
+TRITONBACKEND_ModelInstance object.
+
+The only function that the backend must implement is
+TRITONBACKEND_ModelInstanceExecute. The
+TRITONBACKEND_ModelInstanceExecute function is called by Triton to
+perform inference/computation on a batch of inference requests. Most
+backends will also implement TRITONBACKEND_ModelInstanceInitialize
+and TRITONBACKEND_ModelInstanceFinalize to initialize the backend for
+a given model instance and to manage the user-defined state associated
+with the model (for more information see [backend
+lifecycles](#backend-lifecycles)).
+
+The backend must take into account threading concerns when
+implementing TRITONBACKEND_ModelInstanceInitialize,
+TRITONBACKEND_ModelInstanceFinalize and
+TRITONBACKEND_ModelInstanceExecute.  Triton will not perform multiple
+simultaneous calls to these functions for a given model instance;
+however, if a backend is used by a model with multiple instances or by
+multiple models Triton may simultaneously call the functions with a
+different thread for each model instance. As a result, the backend
+must be able to handle multiple simultaneous calls to the
+functions. Best practice for backend implementations is to use only
+function-local and model-specific user-defined state in these
+functions, as is shown in the [tutorial](examples/README.md).
+
+#### TRITONBACKEND_Request
+
+A TRITONBACKEND_Request object represents an inference request made
+to the model. The backend takes ownership of the request object(s) in
+TRITONBACKEND_ModelInstanceExecute and must release each request by
+calling TRITONBACKEND_RequestRelease. However, the ownership of request
+object is returned back to Triton in case TRITONBACKEND_ModelInstanceExecute
+returns an error. See [Inference Requests and Responses](#inference-requests-and-responses)
+for more information about request lifecycle.
+
+The Triton Backend API allows the backend to get information about the
+request as well as the input and request output tensors of the
+request. Each request input is represented by a TRITONBACKEND_Input
+object.
+
+#### TRITONBACKEND_Response
+
+A TRITONBACKEND_Response object represents a response sent by the
+backend for a specific request. The backend uses the response API to
+set the name, shape, datatype and tensor values for each output tensor
+included in the response. The response can indicate either a failed or
+a successful request. See [Inference Requests and
+Responses](#inference-requests-and-responses) for more information
+about request-response lifecycle.
+
+### Backend Lifecycles
+
+A backend must carefully manage the lifecycle of the backend itself,
+the models and model instances that use the backend and the inference
+requests that execute on the model instances using the backend.
+
+#### Backend and Model
+
+Backend, model and model instance initialization is triggered when
+Triton loads a model.
+
+* If the model requires a backend that is not already in use by an
+  already loaded model, then:
+
+  * Triton [loads the shared library](#backend-shared-library) that
+    implements the backend required by the model.
+
+  * Triton creates the TRITONBACKEND_Backend object that represents
+    the backend.
+
+  * Triton calls TRITONBACKEND_Initialize if it is implemented in the
+    backend shared library. TRITONBACKEND_Initialize should not return
+    until the backend is completely initialized. If
+    TRITONBACKEND_Initialize returns an error, Triton will report that
+    the model failed to load.
+
+* Triton creates the TRITONBACKEND_Model object that represents the
+  model. Triton calls TRITONBACKEND_ModelInitialize if it is
+  implemented in the backend shared library.
+  TRITONBACKEND_ModelInitialize should not return until the backend
+  is completely initialized for the model. If
+  TRITONBACKEND_ModelInitialize returns an error, Triton will show
+  that the model failed to load.
+
+* For each model instance specified for the model in the model
+  configuration:
+
+  * Triton creates the TRITONBACKEND_ModelInstance object that
+    represents the model instance.
+
+  * Triton calls TRITONBACKEND_ModelInstanceInitialize if it is
+    implemented in the backend shared library.
+    TRITONBACKEND_ModelInstanceInitialize should not return until the
+    backend is completely initialized for the instance. If
+    TRITONBACKEND_ModelInstanceInitialize returns an error, Triton
+    will show that the model failed to load.
+
+Backend, model and model instance finalization is triggered when
+Triton unloads a model.
+
+* For each model instance:
+
+  * Triton calls TRITONBACKEND_ModelInstanceFinalize if it is
+    implemented in the backend shared library.
+    TRITONBACKEND_ModelInstanceFinalize should not return until the
+    backend is completely finalized, including stopping any threads
+    create for the model instance and freeing any user-defined state
+    created for the model instance.
+
+  * Triton destroys the TRITONBACKEND_ModelInstance object that
+    represents the model instance.
+
+* Triton calls TRITONBACKEND_ModelFinalize if it is implemented in the
+  backend shared library. TRITONBACKEND_ModelFinalize should not
+  return until the backend is completely finalized, including stopping
+  any threads create for the model and freeing any user-defined state
+  created for the model.
+
+* Triton destroys the TRITONBACKEND_Model object that represents the
+  model.
+
+* Even if no other loaded model requires the backend, Triton does not
+  finalize and unload the backend until the tritonserver process is
+  exiting. When the tritonserver process exits:
+
+  * Triton calls TRITONBACKEND_Finalize if it is implemented in the
+    backend shared library. TRITONBACKEND_ModelFinalize should not
+    return until the backend is completely finalized, including
+    stopping any threads create for the backend and freeing any
+    user-defined state created for the backend.
+
+  * Triton destroys the TRITONBACKEND_Backend object that represents
+    the backend.
+
+#### Inference Requests and Responses
+
+Triton calls TRITONBACKEND_ModelInstanceExecute to execute inference
+requests on a model instance. Each call to
+TRITONBACKEND_ModelInstanceExecute communicates a batch of requests
+to execute and the instance of the model that should be used to
+execute those requests. The backend should not allow the caller
+thread to return from TRITONBACKEND_ModelInstanceExecute until that
+instance is ready to handle another set of requests. Typically this
+means that the TRITONBACKEND_ModelInstanceExecute function will
+create responses and release the requests before returning. However,
+in case TRITONBACKEND_ModelInstanceExecute returns an error, the ownership
+of requests is transferred back to Triton which will then be responsible
+for releasing them. Therefore, in the case where TRITONBACKEND_ModelInstanceExecute
+returns an error, the backend must not retain references to the requests
+or access them in any way. For more detailed description of request/response
+lifetimes, study the documentation of TRITONBACKEND_ModelInstanceExecute in
+[tritonbackend.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonbackend.h).
+
+##### Single Response
+
+Most backends will create a single response for each request. For that
+kind of backend, executing a single inference request requires the
+following steps:
+
+* Create a response for the request using TRITONBACKEND_ResponseNew.
+
+* For each request input tensor use TRITONBACKEND_InputProperties to
+  get shape and datatype of the input as well as the buffer(s)
+  containing the tensor contents.
+
+* For each output tensor which the request expects to be returned, use
+  TRITONBACKEND_ResponseOutput to create the output tensor of the
+  required datatype and shape. Use TRITONBACKEND_OutputBuffer to get a
+  pointer to the buffer where the tensor's contents should be written.
+
+* Use the inputs to perform the inference computation that produces
+  the requested output tensor contents into the appropriate output
+  buffers.
+
+* Optionally set parameters in the response.
+
+* Send the response using TRITONBACKEND_ResponseSend.
+
+* Release the request using TRITONBACKEND_RequestRelease.
+
+For a batch of requests the backend should attempt to combine the
+execution of the individual requests as much as possible to increase
+performance.
+
+##### Decoupled Responses
+
+It is also possible for a backend to send multiple responses for a
+request or not send any responses for a request. A backend may also
+send responses out-of-order relative to the order that the request
+batches are executed. Such backends are called *decoupled* backends.
+The decoupled backends use one `ResponseFactory` object per request to keep
+creating and sending any number of responses for the request. For this
+kind of backend, executing a single inference request typically requires
+the following steps:
+
+* For each request input tensor use TRITONBACKEND_InputProperties to
+  get shape and datatype of the input as well as the buffer(s)
+  containing the tensor contents.
+
+* Create a `ResponseFactory` object for the request using
+  TRITONBACKEND_ResponseFactoryNew.
+
+  1. Create a response from the `ResponseFactory` object using
+  TRITONBACKEND_ResponseNewFromFactory. As long as you have
+  `ResponseFactory` object you can continue creating responses.
+
+  2. For each output tensor which the request expects to be returned, use
+  TRITONBACKEND_ResponseOutput to create the output tensor of the
+  required datatype and shape. Use TRITONBACKEND_OutputBuffer to get a
+  pointer to the buffer where the tensor's contents should be written.
+
+  3. Use the inputs to perform the inference computation that produces
+  the requested output tensor contents into the appropriate output
+  buffers.
+
+  4. Optionally set parameters in the response.
+
+  5. Send the response using TRITONBACKEND_ResponseSend. If this is the
+     last request then use TRITONSERVER_ResponseCompleteFlag with
+     TRITONBACKEND_ResponseSend. Otherwise continue with Step 1 for
+     sending next request
+
+* Release the request using TRITONBACKEND_RequestRelease.
+
+###### Special Cases
+
+The decoupled API is powerful and supports various special cases:
+
+* If the backend should not send any response for the request,
+  TRITONBACKEND_ResponseFactorySendFlags can be used to send
+  TRITONSERVER_RESPONSE_COMPLETE_FINAL using the `ResponseFactory`.
+
+* The model can also send responses out-of-order in which it received
+  requests.
+
+* The backend can copy out the contents of the input buffer(s) if
+  request is to be released before the contents are completely
+  consumed to generate responses. After copy, the request can be
+  released anytime before exiting TRITONBACKEND_ModelInstanceExecute.
+  The copies and `ResponseFactory` object can be passed to a separate
+  thread in backend. This means main caller thread can exit from
+  TRITONBACKEND_ModelInstanceExecute and the backend can still continue
+  generating responses as long as it holds `ResponseFactory` object.
+
+
+The [repeat example](examples/README.md) demonstrates full power of
+what can be acheived from decoupled API.
+
+
+Study documentation of these TRTIONBACKEND_* functions in
+[tritonbackend.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonbackend.h)
+for more details on these APIs. Read
+[Decoupled Backends and Models](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/decoupled_models.md)
+for more details on how to host a decoupled model.
+
+## Build the Backend Utilities
+
+The source in this repo builds into a single "backend utilities"
+library that is useful when building backends. You don't need to use
+these utilities but they will be helpful for most backends.
+
+Typically you don't need to build this repo directly but instead you
+can include it in the build of your backend as is shown in the
+CMakeLists.txt files of the [tutorial examples](examples/README.md).
+
+To build and install in a local directory use the following commands.
+
+```
+$ mkdir build
+$ cd build
+$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install ..
+$ make install
+```
+
+The following required Triton repositories will be pulled and used in
+the build. By default the "main" branch/tag will be used for each repo
+but the listed CMake argument can be used to override.
+
+* triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag]
+* triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag]
+
+See the [CMakeLists.txt](CMakeLists.txt) file for other build options.
--- a/3rdparty/backend-r22.12/cmake/TritonBackendConfig.cmake.in
+++ b/3rdparty/backend-r22.12/cmake/TritonBackendConfig.cmake.in
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TRITONBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TRITONBACKEND_CMAKE_DIR})
+
+if(NOT TARGET TritonBackend::triton-backend-utils)
+  include("${TRITONBACKEND_CMAKE_DIR}/TritonBackendTargets.cmake")
+endif()
+
+set(TRITONBACKEND_LIBRARIES TritonBackend::triton-backend-utils)
--- a/3rdparty/backend-r22.12/docs/backend_platform_support_matrix.md
+++ b/3rdparty/backend-r22.12/docs/backend_platform_support_matrix.md
+<!--
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Backend-Platform Support Matrix
+
+Even though Triton supports inference across various platforms such as
+cloud, data center, edge and embedded devices on NVIDIA GPUs, x86 and
+ARM CPU, or AWS Inferentia, it does so by relying on the backends.
+Note that not all Triton backends support every platform. The purpose
+of this document is to go over what all compute platforms are supported
+by each of these Triton backends.
+GPU in this document refers to Nvidia GPU. See
+[GPU, Driver, and CUDA Support Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)
+to learn more about supported GPUs.
+
+## Ubuntu 20.04
+
+The table below describes target device(s) supported for inference by
+each backend on different platforms.
+
+| Backend      | x86       | ARM-SBSA      |
+| ------------ | --------- | ------------- |
+| TensorRT     |  :heavy_check_mark: GPU <br/> :x: CPU | :heavy_check_mark: GPU <br/> :x: CPU       |
+| ONNX Runtime |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU      |
+| TensorFlow   |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU      |
+| PyTorch      |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU      |
+| OpenVINO     |  :x: GPU <br/> :heavy_check_mark: CPU    |     :x: GPU <br/> :x: CPU       |
+| Python[^1]   |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |
+| DALI         |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  | :heavy_check_mark: GPU[^2] <br/> :heavy_check_mark: CPU[^2] |
+| FIL          |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |  Unsupported  |
+
+
+
+## Windows 10
+
+Only TensorRT and ONNX Runtime backends are supported on Windows.
+
+| Backend      | x86       | ARM-SBSA      |
+| ------------ | --------- | ------------- |
+| TensorRT     |  :heavy_check_mark: GPU <br/> :x: CPU | :heavy_check_mark: GPU <br/> :x: CPU       |
+| ONNX Runtime |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU      |
+
+
+## Jetson JetPack
+
+Following backends are currently supported on Jetson Jetpack:
+
+| Backend      |   Jetson  |
+| ------------ | --------- |
+| TensorRT     |  :heavy_check_mark: GPU <br/> :x: CPU    |
+| ONNX Runtime |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |
+| TensorFlow   |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |
+| PyTorch      |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |
+| Python[^1]   |  :x: GPU <br/> :heavy_check_mark: CPU    |
+
+
+Look at the [Triton Inference Server Support for Jetson and JetPack](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/jetson.md).
+
+
+## AWS Inferentia
+
+Currently, inference on AWS Inferentia is only supported via
+[python backend](https://github.com/triton-inference-server/python_backend#running-with-inferentia)
+where the deployed python script invokes AWS Neuron SDK.
+
+
+[^1]: The supported devices for Python Backend are mentioned with
+respect to Triton. The python script running in Python Backend can
+be used to execute inference on any hardware if there are available
+python APIs to do so. AWS inferentia is one such example. Triton
+core is largely unaware of the fact that inference will run on
+Inferentia.
+
+[^2]: In case of ARM-SBSA, some operations are not fully supported.
--- a/3rdparty/backend-r22.12/examples/README.md
+++ b/3rdparty/backend-r22.12/examples/README.md
+<!--
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
+
+# Triton Example Backends
+
+To learn how to create a Triton backend, and to see a best-practices
+baseline onto which you can add your own backend log, follow the
+[Tutorial](#tutorial).
+
+Triton also provides a couple of example backends that demonstrate
+specific aspects of the backend API not covered by the
+[Tutorial](#tutorial).
+
+* The
+[*repeat*](https://github.com/triton-inference-server/repeat_backend)
+backend shows a more advanced example of how a backend can produce
+multiple responses per request.
+
+* The
+[*stateful*](https://github.com/triton-inference-server/stateful_backend)
+backend shows an example of how a backend can manage model state
+tensors on the server-side for the [sequence
+batcher](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#sequence-batcher)
+to avoid transferring state tensors between client and server. Triton
+also implements [Implicit State
+Management](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/architecture.md#implicit-state-management)
+which allows backends to behave in a stateless manner and leave the
+state management to Triton.
+
+## Tutorial
+
+The [Triton Backend API](../README.md#triton-backend-api) exposes a
+large number of features. The backend utilities and classes provide
+many functions commonly used when creating a backend. But to create a
+functional backend it is not necessary to use most of the backend API
+or utilities. The tutorial starts with an implementation that shows a
+*minimal* backend and then adds on recommended and optional
+enhancements. The tutorial implementations follow best practices for
+Triton backends and so can be used as templates for your own backend.
+
+### *Minimal* Triton Backend
+
+The source code for the *minimal* backend is contained in
+[minimal.cc](backends/minimal/src/minimal.cc). The source code
+contains extensive documentation describing the operation of the
+backend and the use of the [Triton Backend
+API](../README.md#triton-backend-api) and the backend
+utilities. Before reading the source code, make sure you understand
+the concepts associated with Triton backend abstractions
+[TRITONBACKEND_Backend](../README.md#tritonbackend_backend),
+[TRITONBACKEND_Model](../README.md#tritonbackend_model), and
+[TRITONBACKEND_ModelInstance](../README.md#tritonbackend_modelinstance).
+
+The *minimal* backend does not do any interesting operation, it simply
+copies a single input tensor to a single output tensor, but it does
+demonstrate the basic organization required for a Triton backend.
+
+The *minimal* backend is complete but for clarity leaves out some
+important aspects of writing a full-featured backend that are
+described in [*Recommended* Triton
+Backend](#recommended-triton-backend). When creating your own backend
+use the [*Recommended* Triton Backend](#recommended-triton-backend) as
+a starting point.
+
+#### Building the *Minimal* Backend
+
+[backends/minimal/CMakeLists.txt](backends/minimal/CMakeLists.txt)
+shows the recommended build and install script for a Triton
+backend. To build the *minimal* backend and install in a local directory
+use the following commands.
+
+```
+$ cd backends/minimal
+$ mkdir build
+$ cd build
+$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install ..
+$ make install
+```
+
+The following required Triton repositories will be pulled and used in
+the build. By default the "main" branch/tag will be used for each repo
+but the listed CMake argument can be used to override.
+
+* triton-inference-server/backend: -DTRITON_BACKEND_REPO_TAG=[tag]
+* triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag]
+* triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag]
+
+If you are building on a release branch (or on a development branch
+that is based off of a release branch), then you must set these cmake
+arguments to point to that release branch as well. For example, if you
+are building the r21.10 identity_backend branch then you need to use
+the following additional cmake flags:
+
+```
+-DTRITON_BACKEND_REPO_TAG=r21.10
+-DTRITON_CORE_REPO_TAG=r21.10
+-DTRITON_COMMON_REPO_TAG=r21.10
+```
+
+After building the install directory will contain a backends/minimal
+directory that contains the *minimal* backend. Instructions for adding
+this backend to the Triton server are described in [Backend Shared
+Library](../README.md#backend-shared-library).
+
+#### Running Triton with the *Minimal* Backend
+
+After adding the *minimal* backend to the Triton server as described
+in [Backend Shared Library](../README.md#backend-shared-library), you
+can run Triton and have it load the models in
+[model_repos/minimal_models](model_repos/minimal_models). Assuming you
+have created a *tritonserver* Docker image by adding the *minimal*
+backend to Triton, the following command will run Triton:
+
+```
+$ docker run --rm -it --net=host -v/path/to/model_repos/minimal_models:/models tritonserver --model-repository=/models
+```
+
+The console output will show similar to the following indicating that
+the *batching* and *nonbatching* models from the minimal_models
+repository have loaded correctly. Note that the model repository has
+two models that both use the *minimal* backend. A backend can support
+any number of diffent models.
+
+```
+I1215 23:46:00.250284 68 server.cc:589]
+-------------+---------+--------+
+| Model       | Version | Status |
+-------------+---------+--------+
+| batching    | 1       | READY  |
+| nonbatching | 1       | READY  |
+-------------+---------+--------+
+```
+
+The models are identical except that the *batching* model enabled the
+[dynamic
+batcher](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#dynamic-batcher)
+and supports batch sizes up to 8. Note that the *batching* model sets
+the [batch
+delay](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#delayed-batching)
+to 5 seconds so that the example client described below can
+demonstrate how the *minimal* backend receives a batch of requests.
+
+#### Testing the *Minimal* Backend
+
+The [clients](clients) directory holds example clients. The
+[minimal_client](clients/minimal_client) Python script demonstrates
+sending a couple of inference requests to the *minimal* backend. With
+Triton running as described in [Running Triton with the *Minimal*
+Backend](#running-triton-with-the-minimal-backend), execute the
+client:
+
+```
+$ clients/minimal_client
+```
+
+The minimal_client first sends a single request to nonbatching
+model. From the output you can see that the input value is returned in
+the output.
+
+```
+=========
+Sending request to nonbatching model: IN0 = [1 2 3 4]
+Response: {'model_name': 'nonbatching', 'model_version': '1', 'outputs': [{'name': 'OUT0', 'datatype': 'INT32', 'shape': [4], 'parameters': {'binary_data_size': 16}}]}
+OUT0 = [1 2 3 4]
+```
+
+In the Triton console output you can see the log message printed by
+the *minimal* backend that indicates that it received a batch
+containing the single request.
+
+```
+I1221 18:14:12.964836 86 minimal.cc:348] model nonbatching: requests in batch 1
+I1221 18:14:12.964857 86 minimal.cc:356] batched IN0 value: [ 1, 2, 3, 4 ]
+```
+
+The minimal_client next sends 2 requests at the same time to the
+batching model. Triton will dynamically batch those requests into a
+single batch and send that single batch to the *minimal* backend.
+
+```
+=========
+Sending request to batching model: IN0 = [[10 11 12 13]]
+Sending request to batching model: IN0 = [[20 21 22 23]]
+Response: {'model_name': 'batching', 'model_version': '1', 'outputs': [{'name': 'OUT0', 'datatype': 'INT32', 'shape': [1, 4], 'parameters': {'binary_data_size': 16}}]}
+OUT0 = [[10 11 12 13]]
+Response: {'model_name': 'batching', 'model_version': '1', 'outputs': [{'name': 'OUT0', 'datatype': 'INT32', 'shape': [1, 4], 'parameters': {'binary_data_size': 16}}]}
+OUT0 = [[20 21 22 23]]
+```
+
+In the Triton console output you can see the log message indicating
+that the *minimal* backend received a batch containing both requests.
+
+```
+I1221 18:14:17.965982 86 minimal.cc:348] model batching: requests in batch 2
+I1221 18:14:17.966035 86 minimal.cc:356] batched IN0 value: [ 10, 11, 12, 13, 20, 21, 22, 23 ]
+```
+
+### *Recommended* Triton Backend
+
+The source code for the *recommended* backend is contained in
+[recommended.cc](backends/recommended/src/recommended.cc). The source
+code contains extensive documentation describing the operation of the
+backend and the use of the [Triton Backend
+API](../README.md#triton-backend-api) and the backend
+utilities. Before reading the source code, make sure you understand
+the concepts associated with Triton backend abstractions
+[TRITONBACKEND_Backend](../README.md#tritonbackend_backend),
+[TRITONBACKEND_Model](../README.md#tritonbackend_model), and
+[TRITONBACKEND_ModelInstance](../README.md#tritonbackend_modelinstance).
+
+The *recommended* backend improves the [*minimal*
+backend](#minimal-triton-backend) to include the following features
+which should be present in any robust backend implementation:
+
+* Enhances the backend to support models with input/output tensors
+  that have datatypes other than INT32.
+
+* Enhances the backend to support models with input/output tensors
+  that have any shape.
+
+* Uses the Triton backend metric APIs to record statistics about
+  requests executing in the backend. These metrics can then we queried
+  using the Triton
+  [metrics](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/metrics.md)
+  and
+  [statistics](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_statistics.md)
+  APIs.
+
+* Additional error checking to ensure that the backend's version is
+  compatible with Triton and that each model's configuration is
+  compatible with the backend.
+
+As with the *minimal* backend, the *recommended* backend just returns
+the input tensor value in the output tensor. Because of the additions
+described above, the *recommended* backend can serve as a starting
+point for your backend.
+
+#### Building the *Recommended* Backend
+
+[backends/recommended/CMakeLists.txt](backends/recommended/CMakeLists.txt)
+shows the recommended build and install script for a Triton
+backend. Building and installing is the same as decribed in [Building
+the *Minimal* Backend](#building-the-minimal-backend).
+
+#### Running Triton with the *Recommended* Backend
+
+After adding the *recommended* backend to the Triton server as
+described in [Backend Shared
+Library](../README.md#backend-shared-library), you can run Triton and
+have it load the models in
+[model_repos/recommended_models](model_repos/recommended_models). Assuming
+you have created a *tritonserver* Docker image by adding the
+*recommended* backend to Triton, the following command will run
+Triton:
+
+```
+$ docker run --rm -it --net=host -v/path/to/model_repos/recommended_models:/models tritonserver --model-repository=/models
+```
+
+The console output will show similar to the following indicating that
+the *batching* model from the recommended_models repository have
+loaded correctly.
+
+```
+I1215 23:46:00.250284 68 server.cc:589]
+-------------+---------+--------+
+| Model       | Version | Status |
+-------------+---------+--------+
+| batching    | 1       | READY  |
+-------------+---------+--------+
+```
+
+#### Testing the *Recommended* Backend
+
+The [clients](clients) directory holds example clients. The
+[recommended_client](clients/recommended_client) Python script
+demonstrates sending a couple of inference requests to the
+*recommended* backend. With Triton running as described in [Running
+Triton with the *Recommended*
+Backend](#running-triton-with-the-recommended-backend), execute the
+client:
+
+```
+$ clients/recommended_client
+```
+
+The recommended_client next sends 2 requests at the same time to the
+batching model, similar to what was done above with the *minimal*
+backend. Triton will dynamically batch those requests into a single
+batch and send that single batch to the *recommended* backend. In this
+model, batching is supported, the datatype is FP32 and the tensor
+shape is [ -1, 4, 4 ].
+
+```
+=========
+Sending request to batching model: input = [[[1.  1.1 1.2 1.3]
+  [2.  2.1 2.2 2.3]
+  [3.  3.1 3.2 3.3]
+  [4.  4.1 4.2 4.3]]]
+Sending request to batching model: input = [[[10.  10.1 10.2 10.3]
+  [20.  20.1 20.2 20.3]
+  [30.  30.1 30.2 30.3]
+  [40.  40.1 40.2 40.3]]]
+Response: {'model_name': 'batching', 'model_version': '1', 'outputs': [{'name': 'OUTPUT', 'datatype': 'FP32', 'shape': [1, 4, 4], 'parameters': {'binary_data_size': 64}}]}
+OUTPUT = [[[1.  1.1 1.2 1.3]
+  [2.  2.1 2.2 2.3]
+  [3.  3.1 3.2 3.3]
+  [4.  4.1 4.2 4.3]]]
+Response: {'model_name': 'batching', 'model_version': '1', 'outputs': [{'name': 'OUTPUT', 'datatype': 'FP32', 'shape': [1, 4, 4], 'parameters': {'binary_data_size': 64}}]}
+OUTPUT = [[[10.  10.1 10.2 10.3]
+  [20.  20.1 20.2 20.3]
+  [30.  30.1 30.2 30.3]
+  [40.  40.1 40.2 40.3]]]
+```
+
+In the Triton console output you can see the log message indicating
+that the *recommended* backend received a batch containing both
+requests.
+
+```
+I1221 18:30:52.223226 127 recommended.cc:604] model batching: requests in batch 2
+I1221 18:30:52.223313 127 recommended.cc:613] batched INPUT value: [ 1.000000, 1.100000, 1.200000, 1.300000, 2.000000, 2.100000, 2.200000, 2.300000, 3.000000, 3.100000, 3.200000, 3.300000, 4.000000, 4.100000, 4.200000, 4.300000, 10.000000, 10.100000, 10.200000, 10.300000, 20.000000, 20.100000, 20.200001, 20.299999, 30.000000, 30.100000, 30.200001, 30.299999, 40.000000, 40.099998, 40.200001, 40.299999 ]
+```
+
+Because the *recommended* backend can support models that have
+input/output tensors with any datatype and shape, you can edit the
+model configuration and the client to experiment with these options.
+
+To see the metrics collected for these two inference requests, use the following command to access Triton's metrics endpoint.
+
+```
+$ curl localhost:8002/metrics
+```
+
+The output will be metric values in Prometheus data format. The
+[metrics
+documentation](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/metrics.md)
+gives a description of these metric values.
+
+```
+# HELP nv_inference_request_success Number of successful inference requests, all batch sizes
+# TYPE nv_inference_request_success counter
+nv_inference_request_success{model="batching",version="1"} 2.000000
+# HELP nv_inference_request_failure Number of failed inference requests, all batch sizes
+# TYPE nv_inference_request_failure counter
+nv_inference_request_failure{model="batching",version="1"} 0.000000
+# HELP nv_inference_count Number of inferences performed
+# TYPE nv_inference_count counter
+nv_inference_count{model="batching",version="1"} 2.000000
+# HELP nv_inference_exec_count Number of model executions performed
+# TYPE nv_inference_exec_count counter
+nv_inference_exec_count{model="batching",version="1"} 1.000000
+...
+```
+
+You can also see the collected statistics using the [statistics
+endpoint](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_statistics.md).
+
+```
+$ curl localhost:8000/v2/models/batching/stats
+{"model_stats":[{"name":"batching","version":"1","last_inference":1640111452223,"inference_count":2,"execution_count":1,"inference_stats":{"success":{"count":2,"ns":9997025869},"fail":{"count":0,"ns":0},"queue":{"count":2,"ns":9996491319},"compute_input":{"count":2,"ns":95288},"compute_infer":{"count":2,"ns":232202},"compute_output":{"count":2,"ns":195850}},"batch_stats":[{"batch_size":2,"compute_input":{"count":1,"ns":47644},"compute_infer":{"count":1,"ns":116101},"compute_output":{"count":1,"ns":97925}}]}]}
+```
+
+### *BLS* Triton Backend
+
+Please see the [doucumentation](backends/bls/README.md) of *BLS* Backend.
+
+### Enhancements
+
+This section describes several optional features that you can add to
+enhance the capabilities of your backend.
+
+#### Automatically Model Configuration Generation
+
+[Automatic model configuration
+generation](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#auto-generated-model-configuration)
+is enabled by the backend implementing the appropriate logic (for
+example, in a function called AutoCompleteConfig) during
+TRITONBACKEND_ModelInitialize. For the *recommended* backend you would
+add a call to AutoCompleteConfig in the ModelState constructor just
+before the call to ValidateModelConfig. The AutoCompleteConfig
+function can update the model configuration with input tensor, output
+tensor, and max-batch-size configuration; and then update the
+configuration using TRITONBACKEND_ModelSetConfig. Examples can be
+found in [ONNXRuntime
+backend](https://github.com/triton-inference-server/onnxruntime_backend),
+[TensorFlow
+backend](https://github.com/triton-inference-server/tensorflow_backend)
+and other backends.
+
+#### Add Key-Value Parameters to a Response
+
+A backend can add a key-value pair to a response any time after the
+response is created and before it is sent. The parameter key must be a
+string and the parameter value can be a string, integer or
+boolean. The following example shows the TRITONBACKEND API used to set
+response parameters. Error checking code is not shown to improve
+clarity.
+
+```
+TRITONBACKEND_ResponseSetStringParameter(response, "param0", "an example string parameter");
+TRITONBACKEND_ResponseSetIntParameter(responses[r], "param1", 42);
+TRITONBACKEND_ResponseSetBoolParameter(responses[r], "param2", false);
+```
+
+#### Access Model Artifacts in the Model Repository
+
+A backend can access any of the files in a model's area of the model
+registry. These files are typically needed during
+TRITONBACKEND_ModelInitialize but can be accessed at other times as
+well. The TRITONBACKEND_ModelRepository API gives the location of the
+model's repository. For example, the following code can be run during
+TRITONBACKEND_ModelInitialize to write the location to the log.
+
+```
+// Can get location of the model artifacts. Normally we would need
+// to check the artifact type to make sure it was something we can
+// handle... but we are just going to log the location so we don't
+// need the check. We would use the location if we wanted to load
+// something from the model's repo.
+TRITONBACKEND_ArtifactType artifact_type;
+const char* clocation;
+RETURN_IF_ERROR(
+    TRITONBACKEND_ModelRepository(model, &artifact_type, &clocation));
+LOG_MESSAGE(
+    TRITONSERVER_LOG_INFO,
+    (std::string("Repository location: ") + clocation).c_str());
+```
+
+The framework backends (for example, TensorRT, ONNXRuntime,
+TensorFlow, PyTorch) read the actual model file from the model
+repository using this API. See those backends for examples of how it
+can be used.
--- a/3rdparty/backend-r22.12/examples/backends/bls/README.md
+++ b/3rdparty/backend-r22.12/examples/backends/bls/README.md
+<!--
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# *BLS* Triton Backend
+
+The [*BLS*](../bls) backend demonstrates using in-process C-API to
+execute inferences within the backend. This backend serves as an example to
+backend developers for implementing their own custom pipeline in C++.
+For Python use cases, please refer to 
+[Business Logic Scripting](https://github.com/triton-inference-server/python_backend/blob/main/README.md#business-logic-scripting)
+section in Python backend.
+
+The source code for the *bls* backend is contained in
+[src](./src).
+
+* [backend.cc](./src/backend.cc) contains the main backend
+implementation. The content of this file is not BLS specific. It only includes
+the required Triton backend functions that is standard for any backend
+implementation. The BLS logic is set off in the
+`TRITONBACKEND_ModelInstanceExecute` with lines `bls_executor.Execute(requests[r], &responses[r]);`.
+
+* [bls.h](./src/bls.h) is where the BLS (class `BLSExecutor`) of
+this example is located. You can refer to this file to see how to interact with
+Triton in-process C-API to build the custom execution pipeline.
+
+* [bls_utils.h](./src/bls_utils.h) is where all the utilities that
+are not BLS dependent are located.
+
+The source code contains extensive documentation describing the operation of
+the backend and the use of the
+[Triton Backend API](../../../README.md#triton-backend-api) and the
+[Triton Server API](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/inference_protocols.md#in-process-triton-server-api).
+Before reading the source code, make sure you understand
+the concepts associated with Triton backend abstractions
+[TRITONBACKEND_Backend](../../../README.md#tritonbackend_backend),
+[TRITONBACKEND_Model](../../../README.md#tritonbackend_model), and
+[TRITONBACKEND_ModelInstance](../../../README.md#tritonbackend_modelinstance).
+
+The *bls* backend will send two requests on the 'addsub_python' and 'addsub_tf'
+models. After the inference requests are completed, this backend will extract
+OUTPUT0 from the 'addsub_python' and OUTPUT1 from the 'addsub_tf' model to
+construct the final inference response object using these tensors.
+
+There are some self-imposed limitations that were made for the simplicity of
+this example:
+1. This backend does not support batching.
+2. This backend does not support decoupled models.
+3. This backend does not support GPU tensors.
+4. The model configuraion should be strictly set as the comments described in
+[backend.cc](./src/backend.cc).
+
+You can implement your custom backend that is not limited to the limitations
+mentioned above.
+
+## Building the *BLS* Backend
+
+[backends/bls/CMakeLists.txt](CMakeLists.txt)
+shows the recommended build and install script for a Triton
+backend. Building and installing is the same as decribed in [Building
+the *Minimal* Backend](../../README.md#building-the-minimal-backend).
+
+## Running Triton with the *BLS* Backend
+
+After adding the *bls* backend to the Triton server as
+described in [Backend Shared
+Library](../../../README.md#backend-shared-library), you can run Triton and
+have it load the models in
+[model_repos/bls_models](../../model_repos/bls_models). Assuming you have created a
+*tritonserver* Docker image by adding the *bls* backend to Triton, the
+following command will run Triton:
+
+```
+$ docker run --rm -it --net=host -v/path/to/model_repos/bls_models:/models tritonserver --model-repository=/models
+```
+
+The console output will show similar to the following indicating that
+the *bls_fp32*, *addsub_python* and *addsub_tf* models from the bls_models repository have
+loaded correctly.
+
+```
+I0616 09:34:47.767433 19214 server.cc:629] 
+---------------+---------+--------+
+| Model         | Version | Status |
+---------------+---------+--------+
+| addsub_python | 1       | READY  |
+| addsub_tf     | 1       | READY  |
+| bls_fp32      | 1       | READY  |
+---------------+---------+--------+
+```
+
+## Testing the *BLS* Backend
+
+The [clients](../../clients) directory holds example clients. The
+[bls_client](../../clients/bls_client) Python script demonstrates sending an
+inference requests to the *bls* backend. With Triton running as
+described in [Running Triton with the *BLS* Backend](#running-triton-with-the-bls-backend),
+execute the client:
+
+```
+$ clients/bls_client
+```
+
+You should see an output similar to the output below:
+
+```
+INPUT0 ([0.42935285 0.51512766 0.43625894 ... 0.6670954  0.17747518 0.7976901 ]) + INPUT1 ([6.7752063e-01 2.4223252e-01 6.7743927e-01 ... 4.1531715e-01 2.5451833e-01 7.9097062e-01]) = OUTPUT0 ([1.1068735  0.75736016 1.1136982 ... 1.0824126  0.4319935  1.5886607 ])
+INPUT0 ([0.42935285 0.51512766 0.43625894 ... 0.6670954  0.17747518 0.7976901 ]) - INPUT1 ([6.7752063e-01 2.4223252e-01 6.7743927e-01 ... 4.1531715e-01 2.5451833e-01 7.9097062e-01]) = OUTPUT1 ([-0.24816778  0.27289516 -0.24118033 ... 0.25177827 -0.07704315  0.00671947])
+
+PASS
+```
--- a/3rdparty/backend-r22.12/examples/backends/bls/cmake/TritonBLSBackendConfig.cmake.in
+++ b/3rdparty/backend-r22.12/examples/backends/bls/cmake/TritonBLSBackendConfig.cmake.in
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TRITONBLSBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TRITONBLSBACKEND_CMAKE_DIR})
+
+if(NOT TARGET TritonBLSBackend::triton-bls-backend)
+  include("${TRITONBLSBACKEND_CMAKE_DIR}/TritonBLSBackendTargets.cmake")
+endif()
+
+set(TRITONBLSBACKEND_LIBRARIES TritonBLSBackend::triton-bls-backend)
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/backend.cc
+++ b/3rdparty/backend-r22.12/examples/backends/bls/src/backend.cc
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "bls.h"
+#include "triton/backend/backend_model.h"
+#include "triton/backend/backend_model_instance.h"
+
+//
+// Backend that demonstrates using in-process C-API to execute inferences
+// within the backend.
+//
+// Two particular models, 'addsub_python' and 'addsub_tf', must be loaded on
+// the server for a successful inference execution on this backend.
+//
+// The model configuration should be set as follows in order to be in line with
+// the 'addsub_python' and 'addsub_tf' models. This backend does not support
+// batching. These limitations are only for this specific backend. You can
+// implement your custom BLS backend with less limitations.
+//
+// Model Configuration:
+//   - Input 'INPUT0' must have shape [16] and datatype must be TYPE_FP32.
+//
+//   - Input 'INPUT1' must have shape [16] and datatype must be TYPE_FP32.
+//
+//   - For each response, output 'OUTPUT0' must have shape [16] and
+//     datatype TYPE_FP32.
+//
+//   - For each response, output 'OUTPUT1' must have shape [16] and
+//     datatype TYPE_FP32.
+//
+// This backend will send two requests on the 'addsub_python' and 'addsub_tf'
+// models. After the inference requests are completed, this backend
+// will extract OUTPUT0 from the 'addsub_python' and OUTPUT1 from the
+// 'addsub_tf' model to construct the final inference response object using
+// these tensors.
+
+namespace triton { namespace backend { namespace bls {
+
+//
+// ModelState
+//
+// State associated with a model that is using this backend. An object
+// of this class is created and associated with each
+// TRITONBACKEND_Model.
+//
+class ModelState : public BackendModel {
+ public:
+  static TRITONSERVER_Error* Create(
+      TRITONBACKEND_Model* triton_model, ModelState** state);
+  virtual ~ModelState() = default;
+
+  // Validate that model configuration is supported by this backend.
+  TRITONSERVER_Error* ValidateModelConfig();
+
+ private:
+  ModelState(TRITONBACKEND_Model* triton_model) : BackendModel(triton_model) {}
+};
+
+TRITONSERVER_Error*
+ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
+{
+  try {
+    *state = new ModelState(triton_model);
+  }
+  catch (const BackendModelException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+ModelState::ValidateModelConfig()
+{
+  // We have the json DOM for the model configuration...
+  common::TritonJson::WriteBuffer buffer;
+  RETURN_IF_ERROR(model_config_.PrettyWrite(&buffer));
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("model configuration:\n") + buffer.Contents()).c_str());
+
+  // max_batch_size must be 0 because this backend does not support
+  // batching
+  int64_t max_batch_size;
+  RETURN_IF_ERROR(model_config_.MemberAsInt("max_batch_size", &max_batch_size));
+  RETURN_ERROR_IF_FALSE(
+      max_batch_size == 0, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("bls backend only supports models with max_batch_size == 0"));
+
+  common::TritonJson::Value inputs, outputs;
+  RETURN_IF_ERROR(model_config_.MemberAsArray("input", &inputs));
+  RETURN_IF_ERROR(model_config_.MemberAsArray("output", &outputs));
+
+  // There must be 2 inputs and 2 outputs.
+  RETURN_ERROR_IF_FALSE(
+      inputs.ArraySize() == 2, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected 2 inputs, got ") +
+          std::to_string(inputs.ArraySize()));
+  RETURN_ERROR_IF_FALSE(
+      outputs.ArraySize() == 2, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected 2 outputs, got ") +
+          std::to_string(outputs.ArraySize()));
+
+  // Here we rely on the model configuation listing the inputs and
+  // outputs in a specific order, which we shouldn't really require...
+  common::TritonJson::Value input0, input1, output0, output1;
+  RETURN_IF_ERROR(inputs.IndexAsObject(0, &input0));
+  RETURN_IF_ERROR(inputs.IndexAsObject(1, &input1));
+  RETURN_IF_ERROR(outputs.IndexAsObject(0, &output0));
+  RETURN_IF_ERROR(outputs.IndexAsObject(1, &output1));
+
+  // Check tensor names
+  std::string in0_name, in1_name, out0_name, out1_name;
+  RETURN_IF_ERROR(input0.MemberAsString("name", &in0_name));
+  RETURN_IF_ERROR(input1.MemberAsString("name", &in1_name));
+  RETURN_IF_ERROR(output0.MemberAsString("name", &out0_name));
+  RETURN_IF_ERROR(output1.MemberAsString("name", &out1_name));
+
+  RETURN_ERROR_IF_FALSE(
+      in0_name == "INPUT0", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected first input tensor name to be INPUT0, got ") +
+          in0_name);
+  RETURN_ERROR_IF_FALSE(
+      in1_name == "INPUT1", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected second input tensor name to be INPUT1, got ") +
+          in1_name);
+  RETURN_ERROR_IF_FALSE(
+      out0_name == "OUTPUT0", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected first output tensor name to be OUTPUT0, got ") +
+          out0_name);
+  RETURN_ERROR_IF_FALSE(
+      out1_name == "OUTPUT1", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected second output tensor name to be OUTPUT1, got ") +
+          out1_name);
+
+  // Check shapes
+  std::vector<int64_t> in0_shape, in1_shape, out0_shape, out1_shape;
+  RETURN_IF_ERROR(backend::ParseShape(input0, "dims", &in0_shape));
+  RETURN_IF_ERROR(backend::ParseShape(input1, "dims", &in1_shape));
+  RETURN_IF_ERROR(backend::ParseShape(output0, "dims", &out0_shape));
+  RETURN_IF_ERROR(backend::ParseShape(output1, "dims", &out1_shape));
+
+  RETURN_ERROR_IF_FALSE(
+      in0_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected INPUT0 shape to have one dimension, got ") +
+          backend::ShapeToString(in0_shape));
+  RETURN_ERROR_IF_FALSE(
+      in1_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected INPUT1 shape to have one dimension, got ") +
+          backend::ShapeToString(in1_shape));
+  RETURN_ERROR_IF_FALSE(
+      out0_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected OUTPUT0 shape to have one dimension, got ") +
+          backend::ShapeToString(out0_shape));
+  RETURN_ERROR_IF_FALSE(
+      out1_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected OUTPUT1 shape to have one dimension, got ") +
+          backend::ShapeToString(out1_shape));
+
+  // Check datatypes
+  std::string in0_dtype, in1_dtype, out0_dtype, out1_dtype;
+  RETURN_IF_ERROR(input0.MemberAsString("data_type", &in0_dtype));
+  RETURN_IF_ERROR(input1.MemberAsString("data_type", &in1_dtype));
+  RETURN_IF_ERROR(output0.MemberAsString("data_type", &out0_dtype));
+  RETURN_IF_ERROR(output1.MemberAsString("data_type", &out1_dtype));
+
+  RETURN_ERROR_IF_FALSE(
+      in0_dtype == "TYPE_FP32", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected INPUT0 datatype to be TYPE_FP32, got ") +
+          in0_dtype);
+  RETURN_ERROR_IF_FALSE(
+      in1_dtype == "TYPE_FP32", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected INPUT1 datatype to be TYPE_FP32, got ") +
+          in1_dtype);
+  RETURN_ERROR_IF_FALSE(
+      out0_dtype == "TYPE_FP32", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected OUTPUT0 datatype to be TYPE_FP32, got ") +
+          out0_dtype);
+  RETURN_ERROR_IF_FALSE(
+      out1_dtype == "TYPE_FP32", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected OUTPUT1 datatype to be TYPE_FP32, got ") +
+          out1_dtype);
+
+  return nullptr;  // success
+}
+
+//
+// ModelInstanceState
+//
+// State associated with a model instance. An object of this class is
+// created and associated with each TRITONBACKEND_ModelInstance.
+//
+class ModelInstanceState : public BackendModelInstance {
+ public:
+  static TRITONSERVER_Error* Create(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance,
+      ModelInstanceState** state);
+  virtual ~ModelInstanceState() = default;
+
+  void ProcessRequests(
+      TRITONBACKEND_Request** requests, const uint32_t request_count);
+
+ private:
+  ModelInstanceState(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance)
+      : BackendModelInstance(model_state, triton_model_instance)
+  {
+  }
+};
+
+TRITONSERVER_Error*
+ModelInstanceState::Create(
+    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
+    ModelInstanceState** state)
+{
+  try {
+    *state = new ModelInstanceState(model_state, triton_model_instance);
+  }
+  catch (const BackendModelInstanceException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelInstanceException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+void
+ModelInstanceState::ProcessRequests(
+    TRITONBACKEND_Request** requests, const uint32_t request_count)
+{
+  uint64_t exec_start_ns = 0;
+  SET_TIMESTAMP(exec_start_ns);
+
+  for (size_t i = 0; i < request_count; i++) {
+    // If we get a nullptr request then something is badly wrong. Fail
+    // and release all requests.
+    if (requests[i] == nullptr) {
+      RequestsRespondWithError(
+          requests, request_count,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "null request given to BLS backend for '" + Name() + "'")
+                  .c_str()));
+      return;
+    }
+  }
+
+  // At this point we accept ownership of 'requests', which means that
+  // even if something goes wrong we must still return success from
+  // this function. If something does go wrong in processing a
+  // particular request then we send an error response just for the
+  // specific request.
+  std::vector<TRITONBACKEND_Response*> responses;
+  responses.reserve(request_count);
+
+  for (size_t i = 0; i < request_count; i++) {
+    TRITONBACKEND_Response* response;
+    auto err = TRITONBACKEND_ResponseNew(&response, requests[i]);
+    if (err == nullptr) {
+      responses.emplace_back(response);
+    } else {
+      responses.emplace_back(nullptr);
+      LOG_MESSAGE(TRITONSERVER_LOG_ERROR, "Fail to create response");
+      TRITONSERVER_ErrorDelete(err);
+    }
+  }
+
+  ModelState* model_state = reinterpret_cast<ModelState*>(Model());
+
+  // The way we collect these batch timestamps is not entirely
+  // accurate. Normally, in a performant backend you would execute all
+  // the requests at the same time, and so there would be a single
+  // compute-start / compute-end time-range. But here we execute each
+  // request separately so there is no single range. As a result we
+  // just show the entire execute time as being the compute time as
+  // well.
+  uint64_t compute_start_ns = 0;
+  SET_TIMESTAMP(compute_start_ns);
+
+  // Create a BLSExecutor object. To separate from standard backend
+  // implementation, the BLS logic is placed inside class BLSExecutor.
+  BLSExecutor bls_executor(model_state->TritonServer());
+
+  for (size_t r = 0; r < request_count; r++) {
+    bls_executor.Execute(requests[r], &responses[r]);
+  }
+
+  uint64_t compute_end_ns = 0;
+  SET_TIMESTAMP(compute_end_ns);
+
+  uint64_t exec_end_ns = 0;
+  SET_TIMESTAMP(exec_end_ns);
+
+  // Send all the responses that haven't already been sent because of
+  // an earlier error. Note that the responses are not set to nullptr
+  // here as we need that indication below to determine if the request
+  // we successful or not.
+  for (auto& response : responses) {
+    if (response != nullptr) {
+      LOG_IF_ERROR(
+          TRITONBACKEND_ResponseSend(
+              response, TRITONSERVER_RESPONSE_COMPLETE_FINAL, nullptr),
+          "failed to send BLS backend response");
+    }
+  }
+
+  // Report statistics for each request.
+  for (uint32_t r = 0; r < request_count; ++r) {
+    auto& request = requests[r];
+    LOG_IF_ERROR(
+        TRITONBACKEND_ModelInstanceReportStatistics(
+            TritonModelInstance(), request,
+            (responses[r] != nullptr) /* success */, exec_start_ns,
+            compute_start_ns, compute_end_ns, exec_end_ns),
+        "failed reporting request statistics");
+
+    LOG_IF_ERROR(
+        TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
+        "failed releasing request");
+  }
+
+  // Report the entire batch statistics.
+  LOG_IF_ERROR(
+      TRITONBACKEND_ModelInstanceReportBatchStatistics(
+          TritonModelInstance(), 1 /*total_batch_size*/, exec_start_ns,
+          compute_start_ns, compute_end_ns, exec_end_ns),
+      "failed reporting batch request statistics");
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_VERBOSE,
+      (std::string("TRITONBACKEND_ModelExecute: model ") + Name() +
+       " released " + std::to_string(request_count) + " requests")
+          .c_str());
+}
+
+/////////////
+
+extern "C" {
+
+// Implementing TRITONBACKEND_ModelInitialize is optional. The backend
+// should initialize any state that is intended to be shared across
+// all instances of the model.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelName(model, &cname));
+  std::string name(cname);
+
+  uint64_t version;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelVersion(model, &version));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_ModelInitialize: ") + name + " (version " +
+       std::to_string(version) + ")")
+          .c_str());
+
+  // With each model we create a ModelState object and associate it
+  // with the TRITONBACKEND_Model.
+  ModelState* model_state;
+  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
+  RETURN_IF_ERROR(
+      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));
+
+  // One of the primary things to do in ModelInitialize is to examine
+  // the model configuration to ensure that it is something that this
+  // backend can support. If not, returning an error from this
+  // function will prevent the model from loading.
+  RETURN_IF_ERROR(model_state->ValidateModelConfig());
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelFinalize is optional unless state
+// is set using TRITONBACKEND_ModelSetState. The backend must free
+// this state and perform any other cleanup.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: delete model state");
+
+  delete model_state;
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceInitialize is optional. The
+// backend should initialize any state that is required for a model
+// instance.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceName(instance, &cname));
+  std::string name(cname);
+
+  int32_t device_id;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceDeviceId(instance, &device_id));
+  TRITONSERVER_InstanceGroupKind kind;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceKind(instance, &kind));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" +
+       TRITONSERVER_InstanceGroupKindString(kind) + " device " +
+       std::to_string(device_id) + ")")
+          .c_str());
+
+  // The instance can access the corresponding model as well... here
+  // we get the model and from that get the model's state.
+  TRITONBACKEND_Model* model;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));
+
+  void* vmodelstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);
+
+  // With each instance we create a ModelInstanceState object and
+  // associate it with the TRITONBACKEND_ModelInstance.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(
+      ModelInstanceState::Create(model_state, instance, &instance_state));
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
+      instance, reinterpret_cast<void*>(instance_state)));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_VERBOSE,
+      (std::string("TRITONBACKEND_ModelInstanceInitialize: instance "
+                   "initialization successful ") +
+       name + " (device " + std::to_string(device_id) + ")")
+          .c_str());
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceFinalize is optional unless
+// state is set using TRITONBACKEND_ModelInstanceSetState. The backend
+// must free this state and perform any other cleanup.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
+  ModelInstanceState* instance_state =
+      reinterpret_cast<ModelInstanceState*>(vstate);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      "TRITONBACKEND_ModelInstanceFinalize: delete instance state");
+
+  delete instance_state;
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceExecute is required.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
+    const uint32_t request_count)
+{
+  // Triton will not call this function simultaneously for the same
+  // 'instance'. But since this backend could be used by multiple
+  // instances from multiple models the implementation needs to handle
+  // multiple calls to this function at the same time (with different
+  // 'instance' objects). Suggested practice for this is to use only
+  // function-local and model-instance-specific state (obtained from
+  // 'instance'), which is what we do here.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
+      instance, reinterpret_cast<void**>(&instance_state)));
+  ModelState* model_state =
+      reinterpret_cast<ModelState*>(instance_state->Model());
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_VERBOSE,
+      (std::string("model ") + model_state->Name() + ", instance " +
+       instance_state->Name() + ", executing " + std::to_string(request_count) +
+       " requests")
+          .c_str());
+
+  instance_state->ProcessRequests(requests, request_count);
+
+  return nullptr;  // success
+}
+
+}  // extern "C"
+
+}}}  // namespace triton::backend::bls
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/bls.cc
+++ b/3rdparty/backend-r22.12/examples/backends/bls/src/bls.cc
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "bls.h"
+
+namespace triton { namespace backend { namespace bls {
+
+BLSExecutor::BLSExecutor(TRITONSERVER_Server* server)
+    : server_(server), model_executor_(server)
+{
+}
+
+TRITONSERVER_Error*
+BLSExecutor::PrepareInferenceRequest(
+    TRITONBACKEND_Request* bls_request,
+    TRITONSERVER_InferenceRequest** irequest, const std::string model_name)
+{
+  // Get request_id, correlation_id, and flags from the current request
+  // for preparing a new inference request that we will send to 'addsub_python'
+  // or 'addsub_tf' model later.
+  const char* request_id;
+  uint64_t correlation_id;
+  uint32_t flags;
+  RETURN_IF_ERROR(TRITONBACKEND_RequestId(bls_request, &request_id));
+  RETURN_IF_ERROR(
+      TRITONBACKEND_RequestCorrelationId(bls_request, &correlation_id));
+  RETURN_IF_ERROR(TRITONBACKEND_RequestFlags(bls_request, &flags));
+
+  // Create an inference request object. The inference request object
+  // is where we set the name of the model we want to use for
+  // inference and the input tensors.
+  RETURN_IF_ERROR(TRITONSERVER_InferenceRequestNew(
+      irequest, server_, model_name.c_str(), -1 /* model_version */));
+  // Set request_id, correlation_id, and flags for the new request.
+  RETURN_IF_ERROR(TRITONSERVER_InferenceRequestSetId(*irequest, request_id));
+  RETURN_IF_ERROR(
+      TRITONSERVER_InferenceRequestSetCorrelationId(*irequest, correlation_id));
+  RETURN_IF_ERROR(TRITONSERVER_InferenceRequestSetFlags(*irequest, flags));
+  RETURN_IF_ERROR(TRITONSERVER_InferenceRequestSetReleaseCallback(
+      *irequest, InferRequestComplete, nullptr /* request_release_userp */));
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+BLSExecutor::PrepareInferenceInput(
+    TRITONBACKEND_Request* bls_request, TRITONSERVER_InferenceRequest* irequest)
+{
+  // Get the properties of the two inputs from the current request.
+  // Then, add the two input tensors and append the input data to the new
+  // request.
+  uint32_t input_count;
+  RETURN_IF_ERROR(TRITONBACKEND_RequestInputCount(bls_request, &input_count));
+
+  TRITONBACKEND_Input* input;
+  const char* name;
+  TRITONSERVER_DataType datatype;
+  const int64_t* shape;
+  uint32_t dims_count;
+  size_t data_byte_size;
+  TRITONSERVER_MemoryType data_memory_type;
+  int64_t data_memory_id;
+  const char* data_buffer;
+
+  for (size_t count = 0; count < input_count; count++) {
+    RETURN_IF_ERROR(TRITONBACKEND_RequestInputByIndex(
+        bls_request, count /* index */, &input));
+    RETURN_IF_ERROR(TRITONBACKEND_InputProperties(
+        input, &name, &datatype, &shape, &dims_count, nullptr, nullptr));
+    RETURN_IF_ERROR(TRITONBACKEND_InputBuffer(
+        input, 0 /* idx */, reinterpret_cast<const void**>(&data_buffer),
+        &data_byte_size, &data_memory_type, &data_memory_id));
+    RETURN_IF_ERROR(TRITONSERVER_InferenceRequestAddInput(
+        irequest, name, datatype, shape, dims_count));
+    RETURN_IF_ERROR(TRITONSERVER_InferenceRequestAppendInputData(
+        irequest, name, &data_buffer[0], data_byte_size, data_memory_type,
+        data_memory_id));
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+BLSExecutor::PrepareInferenceOutput(
+    TRITONBACKEND_Request* bls_request, TRITONSERVER_InferenceRequest* irequest)
+{
+  // Indicate the output tensors to be calculated and returned
+  // for the inference request.
+  uint32_t output_count;
+  RETURN_IF_ERROR(TRITONBACKEND_RequestOutputCount(bls_request, &output_count));
+  const char* output_name;
+  for (size_t count = 0; count < output_count; count++) {
+    RETURN_IF_ERROR(TRITONBACKEND_RequestOutputName(
+        bls_request, count /* index */, &output_name));
+    RETURN_IF_ERROR(
+        TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output_name));
+  }
+
+  return nullptr;  // success
+}
+
+void
+BLSExecutor::Execute(
+    TRITONBACKEND_Request* bls_request, TRITONBACKEND_Response** response)
+{
+  // The names of the models that we will send internal requests on.
+  std::vector<std::string> model_names = {"addsub_python", "addsub_tf"};
+
+  // Check if both models are valid before executing request.
+  try {
+    for (size_t i = 0; i < 2; i++) {
+      // Check if the model is ready.
+      bool is_ready = false;
+      THROW_IF_TRITON_ERROR(TRITONSERVER_ServerModelIsReady(
+          server_, model_names[i].c_str(), -1 /* model_version */, &is_ready));
+      if (!is_ready) {
+        throw BLSBackendException(
+            (std::string("Failed to execute the inference request. Model '") +
+             model_names[i].c_str() + "' is not ready.")
+                .c_str());
+      }
+      // For simplicity, decoupled API is not supported in this BLS backend. You
+      // can implement your own backend that supports decoupled models.
+      uint32_t txn_flags;
+      THROW_IF_TRITON_ERROR(TRITONSERVER_ServerModelTransactionProperties(
+          server_, model_names[i].c_str(), -1 /* model_version */, &txn_flags,
+          nullptr /* voidp */));
+      if ((txn_flags & TRITONSERVER_TXN_DECOUPLED) != 0) {
+        throw BLSBackendException(
+            std::string("Model '") + model_names[i].c_str() +
+            "' is using the decoupled. This BLS Backend doesn't support models "
+            "using the decoupled transaction policy.");
+      }
+    }
+  }
+  catch (const BLSBackendException& bls_exception) {
+    LOG_MESSAGE(TRITONSERVER_LOG_ERROR, bls_exception.what());
+    RESPOND_AND_SET_NULL_IF_ERROR(
+        response,
+        TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INTERNAL, "Failed to send inference requests"));
+    return;
+  }
+
+  // Prepare std::future for each model. Since this BLS backend
+  // can handle requests in parallel, we will send all the inference
+  // requests first and then retrieve them later.
+  std::vector<std::future<TRITONSERVER_InferenceResponse*>> futures(2);
+
+  // The inference request object for sending internal requests.
+  TRITONSERVER_InferenceRequest* irequest = nullptr;
+
+  // For each inference request, the backend sends two requests on the
+  // 'addsub_python' and 'addsub_tf' models.
+  try {
+    for (size_t icount = 0; icount < 2; icount++) {
+      // Initialize the inference request with required information.
+      THROW_IF_TRITON_ERROR(
+          PrepareInferenceRequest(bls_request, &irequest, model_names[icount]));
+      THROW_IF_TRITON_ERROR(PrepareInferenceInput(bls_request, irequest));
+      THROW_IF_TRITON_ERROR(PrepareInferenceOutput(bls_request, irequest));
+
+      // Execute inference request.
+      THROW_IF_TRITON_ERROR(
+          model_executor_.AsyncExecute(irequest, &futures[icount]));
+    }
+  }
+  catch (const BLSBackendException& bls_exception) {
+    LOG_MESSAGE(TRITONSERVER_LOG_ERROR, bls_exception.what());
+    LOG_IF_ERROR(
+        TRITONSERVER_InferenceRequestDelete(irequest),
+        "Failed to delete inference request.");
+    RESPOND_AND_SET_NULL_IF_ERROR(
+        response,
+        TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INTERNAL, "Failed to send inference requests"));
+    return;
+  }
+
+  // If both internal requests are sent successfully, retrieve the output from
+  // each request and construct the final response.
+  ConstructFinalResponse(response, std::move(futures));
+}
+
+void
+BLSExecutor::ConstructFinalResponse(
+    TRITONBACKEND_Response** response,
+    std::vector<std::future<TRITONSERVER_InferenceResponse*>> futures)
+{
+  // Prepare two TRITONSERVER_InferenceResponse* objects for 'addsub_python' and
+  // 'addsub_tf' repectively.
+  std::vector<TRITONSERVER_InferenceResponse*> completed_responses = {nullptr,
+                                                                      nullptr};
+
+  const char* output_name;
+  TRITONSERVER_DataType output_datatype;
+  const int64_t* output_shape;
+  uint64_t dims_count;
+  size_t output_byte_size;
+  TRITONSERVER_MemoryType output_memory_type;
+  int64_t output_memory_id;
+  const void* output_base;
+  void* userp;
+  for (size_t icount = 0; icount < 2; icount++) {
+    // Retrieve the corresponding TRITONSERVER_InferenceResponse object from
+    // 'futures'. The InferResponseComplete function sets the std::promise
+    // so that this thread will block until the response is returned.
+    completed_responses[icount] = futures[icount].get();
+    try {
+      THROW_IF_TRITON_ERROR(
+          TRITONSERVER_InferenceResponseError(completed_responses[icount]));
+    }
+    catch (const BLSBackendException& bls_exception) {
+      LOG_MESSAGE(TRITONSERVER_LOG_ERROR, bls_exception.what());
+
+      if (completed_responses[icount] != nullptr) {
+        LOG_IF_ERROR(
+            TRITONSERVER_InferenceResponseDelete(completed_responses[icount]),
+            "Failed to delete inference response.");
+      }
+      return;
+    }
+    // Retrieve outputs from 'completed_responses'.
+    // Extract OUTPUT0 from the 'addsub_python' and OUTPUT1 from the
+    // 'addsub_tf' model to form the final inference response object.
+    // Get all the information about the output tensor.
+    RESPOND_AND_SET_NULL_IF_ERROR(
+        response,
+        TRITONSERVER_InferenceResponseOutput(
+            completed_responses[icount], icount, &output_name, &output_datatype,
+            &output_shape, &dims_count, &output_base, &output_byte_size,
+            &output_memory_type, &output_memory_id, &userp));
+
+    // Create an output tensor in the final response with
+    // the information retrieved above.
+    TRITONBACKEND_Output* output;
+    RESPOND_AND_SET_NULL_IF_ERROR(
+        response, TRITONBACKEND_ResponseOutput(
+                      *response, &output, output_name, output_datatype,
+                      output_shape, dims_count));
+
+    // Get a buffer that holds the tensor data for the output.
+    // We request a buffer in CPU memory but we have to handle any returned
+    // type. If we get back a buffer in GPU memory we just fail the request.
+    void* output_buffer;
+    output_memory_type = TRITONSERVER_MEMORY_CPU;
+    RESPOND_AND_SET_NULL_IF_ERROR(
+        response, TRITONBACKEND_OutputBuffer(
+                      output, &output_buffer, output_byte_size,
+                      &output_memory_type, &output_memory_id));
+    if (output_memory_type == TRITONSERVER_MEMORY_GPU) {
+      RESPOND_AND_SET_NULL_IF_ERROR(
+          response, TRITONSERVER_ErrorNew(
+                        TRITONSERVER_ERROR_INTERNAL,
+                        "failed to create output buffer in CPU memory"));
+    }
+
+    // Fill the BLS output buffer with output data returned by internal
+    // requests.
+    memcpy(output_buffer, output_base, output_byte_size);
+
+    LOG_IF_ERROR(
+        TRITONSERVER_InferenceResponseDelete(completed_responses[icount]),
+        "Failed to delete inference response.");
+  }
+}
+
+}}}  // namespace triton::backend::bls
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/bls.h
+++ b/3rdparty/backend-r22.12/examples/backends/bls/src/bls.h
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <future>
+#include "bls_utils.h"
+#include "triton/backend/backend_common.h"
+#include "triton/core/tritonbackend.h"
+#include "triton/core/tritonserver.h"
+
+namespace triton { namespace backend { namespace bls {
+
+//
+// BLSExecutor
+//
+// Includes the custom BLS logic for this backend.
+// This class shows how to utilize Triton in-process C-API to build the
+// execution pipeline.
+//
+class BLSExecutor {
+ public:
+  BLSExecutor(TRITONSERVER_Server* server);
+
+  // Prepares the inference request that will be used internally.
+  TRITONSERVER_Error* PrepareInferenceRequest(
+      TRITONBACKEND_Request* bls_request,
+      TRITONSERVER_InferenceRequest** irequest, const std::string model_name);
+
+  // Prepares the input for the internal inference request.
+  TRITONSERVER_Error* PrepareInferenceInput(
+      TRITONBACKEND_Request* bls_request,
+      TRITONSERVER_InferenceRequest* irequest);
+
+  // Prepares the output for the internal inference request.
+  TRITONSERVER_Error* PrepareInferenceOutput(
+      TRITONBACKEND_Request* bls_request,
+      TRITONSERVER_InferenceRequest* irequest);
+
+  // Performs the whole BLS pipeline.
+  void Execute(
+      TRITONBACKEND_Request* bls_request, TRITONBACKEND_Response** response);
+
+  // Constructs the final response.
+  void ConstructFinalResponse(
+      TRITONBACKEND_Response** response,
+      std::vector<std::future<TRITONSERVER_InferenceResponse*>> futures);
+
+ private:
+  // The server object that encapsulates all the functionality of the Triton
+  // server and allows access to the Triton server API.
+  TRITONSERVER_Server* server_;
+
+  // The ModelExecutor object for executing inference request on a model.
+  ModelExecutor model_executor_;
+};
+
+}}}  // namespace triton::backend::bls
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/bls_utils.cc
+++ b/3rdparty/backend-r22.12/examples/backends/bls/src/bls_utils.cc
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "bls_utils.h"
+
+namespace triton { namespace backend { namespace bls {
+
+TRITONSERVER_Error*
+CPUAllocator(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, void* userp, void** buffer,
+    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
+    int64_t* actual_memory_type_id)
+{
+  // For simplicity, this backend example always uses CPU memory regardless of
+  // the preferred memory type. You can make the actual memory type and id that
+  // we allocate be the same as preferred memory type. You can also provide a
+  // customized allocator to support different preferred_memory_type, and reuse
+  // memory buffer when possible.
+  *actual_memory_type = TRITONSERVER_MEMORY_CPU;
+  *actual_memory_type_id = preferred_memory_type_id;
+
+  // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
+  // need to do any other book-keeping.
+  if (byte_size == 0) {
+    *buffer = nullptr;
+    *buffer_userp = nullptr;
+    LOG_MESSAGE(
+        TRITONSERVER_LOG_VERBOSE, ("allocated " + std::to_string(byte_size) +
+                                   " bytes for result tensor " + tensor_name)
+                                      .c_str());
+  } else {
+    void* allocated_ptr = nullptr;
+    *actual_memory_type = TRITONSERVER_MEMORY_CPU;
+    allocated_ptr = malloc(byte_size);
+
+    // Pass the tensor name with buffer_userp so we can show it when
+    // releasing the buffer.
+    if (allocated_ptr != nullptr) {
+      *buffer = allocated_ptr;
+      *buffer_userp = new std::string(tensor_name);
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_VERBOSE,
+          ("allocated " + std::to_string(byte_size) + " bytes in " +
+           TRITONSERVER_MemoryTypeString(*actual_memory_type) +
+           " for result tensor " + tensor_name)
+              .c_str());
+    }
+  }
+
+  return nullptr;  // Success
+}
+
+TRITONSERVER_Error*
+ResponseRelease(
+    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
+    size_t byte_size, TRITONSERVER_MemoryType memory_type,
+    int64_t memory_type_id)
+{
+  std::string* name = nullptr;
+  if (buffer_userp != nullptr) {
+    name = reinterpret_cast<std::string*>(buffer_userp);
+  } else {
+    name = new std::string("<unknown>");
+  }
+
+  std::stringstream ss;
+  ss << buffer;
+  std::string buffer_str = ss.str();
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_VERBOSE,
+      ("Releasing buffer " + buffer_str + " of size " +
+       std::to_string(byte_size) + " in " +
+       TRITONSERVER_MemoryTypeString(memory_type) + " for result '" + *name)
+          .c_str());
+
+  switch (memory_type) {
+    case TRITONSERVER_MEMORY_CPU:
+      free(buffer);
+      break;
+    default:
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          std::string(
+              "error: unexpected buffer allocated in CUDA managed memory")
+              .c_str());
+      break;
+  }
+
+  delete name;
+
+  return nullptr;  // Success
+}
+
+void
+InferRequestComplete(
+    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
+{
+  if (request != nullptr) {
+    LOG_IF_ERROR(
+        TRITONSERVER_InferenceRequestDelete(request),
+        "Failed to delete inference request.");
+  }
+}
+
+void
+InferResponseComplete(
+    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
+{
+  // The following logic only works for non-decoupled models as for decoupled
+  // models it may send multiple responses for a request or not send any
+  // responses for a request. Need to modify this function if the model is using
+  // decoupled API.
+  if (response != nullptr) {
+    // Send 'response' to the future.
+    std::promise<TRITONSERVER_InferenceResponse*>* p =
+        reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
+    p->set_value(response);
+    delete p;
+  }
+}
+
+ModelExecutor::ModelExecutor(TRITONSERVER_Server* server) : server_(server)
+{
+  // When triton needs a buffer to hold an output tensor, it will ask
+  // us to provide the buffer. In this way we can have any buffer
+  // management and sharing strategy that we want. To communicate to
+  // triton the functions that we want it to call to perform the
+  // allocations, we create a "response allocator" object. We pass
+  // this response allocate object to triton when requesting
+  // inference. We can reuse this response allocator object for any
+  // number of inference requests.
+  allocator_ = nullptr;
+  THROW_IF_TRITON_ERROR(TRITONSERVER_ResponseAllocatorNew(
+      &allocator_, CPUAllocator, ResponseRelease, nullptr /* start_fn */));
+}
+
+TRITONSERVER_Error*
+ModelExecutor::AsyncExecute(
+    TRITONSERVER_InferenceRequest* irequest,
+    std::future<TRITONSERVER_InferenceResponse*>* future)
+{
+  // Perform inference by calling TRITONSERVER_ServerInferAsync. This
+  // call is asychronous and therefore returns immediately. The
+  // completion of the inference and delivery of the response is done
+  // by triton by calling the "response complete" callback functions
+  // (InferResponseComplete in this case).
+  auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
+  *future = p->get_future();
+
+  RETURN_IF_ERROR(TRITONSERVER_InferenceRequestSetResponseCallback(
+      irequest, allocator_, nullptr /* response_allocator_userp */,
+      InferResponseComplete, reinterpret_cast<void*>(p)));
+
+  RETURN_IF_ERROR(
+      TRITONSERVER_ServerInferAsync(server_, irequest, nullptr /* trace */));
+
+  return nullptr;  // success
+}
+
+}}}  // namespace triton::backend::bls
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/bls_utils.h
+++ b/3rdparty/backend-r22.12/examples/backends/bls/src/bls_utils.h
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <future>
+#include <sstream>
+#include "triton/backend/backend_common.h"
+#include "triton/core/tritonbackend.h"
+#include "triton/core/tritonserver.h"
+
+namespace triton { namespace backend { namespace bls {
+
+#define THROW_IF_TRITON_ERROR(X)                                       \
+  do {                                                                 \
+    TRITONSERVER_Error* tie_err__ = (X);                               \
+    if (tie_err__ != nullptr) {                                        \
+      throw BLSBackendException(TRITONSERVER_ErrorMessage(tie_err__)); \
+    }                                                                  \
+  } while (false)
+
+//
+// BLSBackendException
+//
+// Exception thrown if error occurs in BLSBackend.
+//
+struct BLSBackendException : std::exception {
+  BLSBackendException(const std::string& message) : message_(message) {}
+
+  const char* what() const throw() { return message_.c_str(); }
+
+  std::string message_;
+};
+
+// Performs the allocations of output tensors.
+TRITONSERVER_Error* CPUAllocator(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, void* userp, void** buffer,
+    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
+    int64_t* actual_memory_type_id);
+
+// Callback functions for server inference.
+TRITONSERVER_Error* ResponseRelease(
+    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
+    size_t byte_size, TRITONSERVER_MemoryType memory_type,
+    int64_t memory_type_id);
+void InferRequestComplete(
+    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp);
+void InferResponseComplete(
+    TRITONSERVER_InferenceResponse* response, const uint32_t flags,
+    void* userp);
+
+//
+// ModelExecutor
+//
+// Execute inference request on a model.
+//
+class ModelExecutor {
+ public:
+  ModelExecutor(TRITONSERVER_Server* server);
+
+  // Performs async inference request.
+  TRITONSERVER_Error* AsyncExecute(
+      TRITONSERVER_InferenceRequest* irequest,
+      std::future<TRITONSERVER_InferenceResponse*>* future);
+
+ private:
+  // The server object that encapsulates all the functionality of the Triton
+  // server and allows access to the Triton server API.
+  TRITONSERVER_Server* server_;
+
+  // The allocator object that will be used for allocating output tensors.
+  TRITONSERVER_ResponseAllocator* allocator_;
+};
+
+}}}  // namespace triton::backend::bls
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/libtriton_bls.ldscript
+++ b/3rdparty/backend-r22.12/examples/backends/bls/src/libtriton_bls.ldscript
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+{
+  global:
+    TRITONBACKEND_*;
+  local: *;
+};
--- a/3rdparty/backend-r22.12/examples/backends/minimal/cmake/TutorialMinimalBackendConfig.cmake.in
+++ b/3rdparty/backend-r22.12/examples/backends/minimal/cmake/TutorialMinimalBackendConfig.cmake.in
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TUTORIALMINIMALBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TUTORIALMINIMALBACKEND_CMAKE_DIR})
+
+if(NOT TARGET TutorialMinimalBackend::triton-minimal-backend)
+  include("${TUTORIALMINIMALBACKEND_CMAKE_DIR}/TutorialMinimalBackendTargets.cmake")
+endif()
+
+set(TUTORIALMINIMALBACKEND_LIBRARIES TutorialMinimalBackend::triton-minimal-backend)
--- a/3rdparty/backend-r22.12/examples/backends/minimal/src/libtriton_minimal.ldscript
+++ b/3rdparty/backend-r22.12/examples/backends/minimal/src/libtriton_minimal.ldscript
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+{
+  global:
+    TRITONBACKEND_*;
+  local: *;
+};
--- a/3rdparty/backend-r22.12/examples/backends/minimal/src/minimal.cc
+++ b/3rdparty/backend-r22.12/examples/backends/minimal/src/minimal.cc
+// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "triton/backend/backend_common.h"
+#include "triton/backend/backend_input_collector.h"
+#include "triton/backend/backend_model.h"
+#include "triton/backend/backend_model_instance.h"
+#include "triton/backend/backend_output_responder.h"
+#include "triton/core/tritonbackend.h"
+
+namespace triton { namespace backend { namespace minimal {
+
+//
+// Minimal backend that demonstrates the TRITONBACKEND API. This
+// backend works for any model that has 1 input called "IN0" with
+// INT32 datatype and shape [ 4 ] and 1 output called "OUT0" with
+// INT32 datatype and shape [ 4 ]. The backend supports both batching
+// and non-batching models.
+//
+// For each batch of requests, the backend returns the input tensor
+// value in the output tensor.
+//
+
+/////////////
+
+//
+// ModelState
+//
+// State associated with a model that is using this backend. An object
+// of this class is created and associated with each
+// TRITONBACKEND_Model. ModelState is derived from BackendModel class
+// provided in the backend utilities that provides many common
+// functions.
+//
+class ModelState : public BackendModel {
+ public:
+  static TRITONSERVER_Error* Create(
+      TRITONBACKEND_Model* triton_model, ModelState** state);
+  virtual ~ModelState() = default;
+
+ private:
+  ModelState(TRITONBACKEND_Model* triton_model) : BackendModel(triton_model) {}
+};
+
+TRITONSERVER_Error*
+ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
+{
+  try {
+    *state = new ModelState(triton_model);
+  }
+  catch (const BackendModelException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+extern "C" {
+
+// Triton calls TRITONBACKEND_ModelInitialize when a model is loaded
+// to allow the backend to create any state associated with the model,
+// and to also examine the model configuration to determine if the
+// configuration is suitable for the backend. Any errors reported by
+// this function will prevent the model from loading.
+//
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
+{
+  // Create a ModelState object and associate it with the
+  // TRITONBACKEND_Model. If anything goes wrong with initialization
+  // of the model state then an error is returned and Triton will fail
+  // to load the model.
+  ModelState* model_state;
+  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
+  RETURN_IF_ERROR(
+      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));
+
+  return nullptr;  // success
+}
+
+// Triton calls TRITONBACKEND_ModelFinalize when a model is no longer
+// needed. The backend should cleanup any state associated with the
+// model. This function will not be called until all model instances
+// of the model have been finalized.
+//
+TRITONSERVER_Error*
+TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);
+  delete model_state;
+
+  return nullptr;  // success
+}
+
+}  // extern "C"
+
+/////////////
+
+//
+// ModelInstanceState
+//
+// State associated with a model instance. An object of this class is
+// created and associated with each
+// TRITONBACKEND_ModelInstance. ModelInstanceState is derived from
+// BackendModelInstance class provided in the backend utilities that
+// provides many common functions.
+//
+class ModelInstanceState : public BackendModelInstance {
+ public:
+  static TRITONSERVER_Error* Create(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance,
+      ModelInstanceState** state);
+  virtual ~ModelInstanceState() = default;
+
+  // Get the state of the model that corresponds to this instance.
+  ModelState* StateForModel() const { return model_state_; }
+
+ private:
+  ModelInstanceState(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance)
+      : BackendModelInstance(model_state, triton_model_instance),
+        model_state_(model_state)
+  {
+  }
+
+  ModelState* model_state_;
+};
+
+TRITONSERVER_Error*
+ModelInstanceState::Create(
+    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
+    ModelInstanceState** state)
+{
+  try {
+    *state = new ModelInstanceState(model_state, triton_model_instance);
+  }
+  catch (const BackendModelInstanceException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelInstanceException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+extern "C" {
+
+// Triton calls TRITONBACKEND_ModelInstanceInitialize when a model
+// instance is created to allow the backend to initialize any state
+// associated with the instance.
+//
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
+{
+  // Get the model state associated with this instance's model.
+  TRITONBACKEND_Model* model;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));
+
+  void* vmodelstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);
+
+  // Create a ModelInstanceState object and associate it with the
+  // TRITONBACKEND_ModelInstance.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(
+      ModelInstanceState::Create(model_state, instance, &instance_state));
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
+      instance, reinterpret_cast<void*>(instance_state)));
+
+  return nullptr;  // success
+}
+
+// Triton calls TRITONBACKEND_ModelInstanceFinalize when a model
+// instance is no longer needed. The backend should cleanup any state
+// associated with the model instance.
+//
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
+  ModelInstanceState* instance_state =
+      reinterpret_cast<ModelInstanceState*>(vstate);
+  delete instance_state;
+
+  return nullptr;  // success
+}
+
+}  // extern "C"
+
+/////////////
+
+extern "C" {
+
+// When Triton calls TRITONBACKEND_ModelInstanceExecute it is required
+// that a backend create a response for each request in the batch. A
+// response may be the output tensors required for that request or may
+// be an error that is returned in the response.
+//
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
+    const uint32_t request_count)
+{
+  // Triton will not call this function simultaneously for the same
+  // 'instance'. But since this backend could be used by multiple
+  // instances from multiple models the implementation needs to handle
+  // multiple calls to this function at the same time (with different
+  // 'instance' objects). Best practice for a high-performance
+  // implementation is to avoid introducing mutex/lock and instead use
+  // only function-local and model-instance-specific state.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
+      instance, reinterpret_cast<void**>(&instance_state)));
+  ModelState* model_state = instance_state->StateForModel();
+
+  // 'responses' is initialized as a parallel array to 'requests',
+  // with one TRITONBACKEND_Response object for each
+  // TRITONBACKEND_Request object. If something goes wrong while
+  // creating these response objects, the backend simply returns an
+  // error from TRITONBACKEND_ModelInstanceExecute, indicating to
+  // Triton that this backend did not create or send any responses and
+  // so it is up to Triton to create and send an appropriate error
+  // response for each request. RETURN_IF_ERROR is one of several
+  // useful macros for error handling that can be found in
+  // backend_common.h.
+
+  std::vector<TRITONBACKEND_Response*> responses;
+  responses.reserve(request_count);
+  for (uint32_t r = 0; r < request_count; ++r) {
+    TRITONBACKEND_Request* request = requests[r];
+    TRITONBACKEND_Response* response;
+    RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
+    responses.push_back(response);
+  }
+
+  // At this point, the backend takes ownership of 'requests', which
+  // means that it is responsible for sending a response for every
+  // request. From here, even if something goes wrong in processing,
+  // the backend must return 'nullptr' from this function to indicate
+  // success. Any errors and failures must be communicated via the
+  // response objects.
+  //
+  // To simplify error handling, the backend utilities manage
+  // 'responses' in a specific way and it is recommended that backends
+  // follow this same pattern. When an error is detected in the
+  // processing of a request, an appropriate error response is sent
+  // and the corresponding TRITONBACKEND_Response object within
+  // 'responses' is set to nullptr to indicate that the
+  // request/response has already been handled and no futher processing
+  // should be performed for that request. Even if all responses fail,
+  // the backend still allows execution to flow to the end of the
+  // function. RESPOND_AND_SET_NULL_IF_ERROR, and
+  // RESPOND_ALL_AND_SET_NULL_IF_ERROR are macros from
+  // backend_common.h that assist in this management of response
+  // objects.
+
+  // The backend could iterate over the 'requests' and process each
+  // one separately. But for performance reasons it is usually
+  // preferred to create batched input tensors that are processed
+  // simultaneously. This is especially true for devices like GPUs
+  // that are capable of exploiting the large amount parallelism
+  // exposed by larger data sets.
+  //
+  // The backend utilities provide a "collector" to facilitate this
+  // batching process. The 'collector's ProcessTensor function will
+  // combine a tensor's value from each request in the batch into a
+  // single contiguous buffer. The buffer can be provided by the
+  // backend or 'collector' can create and manage it. In this backend,
+  // there is not a specific buffer into which the batch should be
+  // created, so use ProcessTensor arguments that cause collector to
+  // manage it.
+
+  BackendInputCollector collector(
+      requests, request_count, &responses, model_state->TritonMemoryManager(),
+      false /* pinned_enabled */, nullptr /* stream*/);
+
+  // To instruct ProcessTensor to "gather" the entire batch of IN0
+  // input tensors into a single contiguous buffer in CPU memory, set
+  // the "allowed input types" to be the CPU ones (see tritonserver.h
+  // in the triton-inference-server/core repo for allowed memory
+  // types).
+  std::vector<std::pair<TRITONSERVER_MemoryType, int64_t>> allowed_input_types =
+      {{TRITONSERVER_MEMORY_CPU_PINNED, 0}, {TRITONSERVER_MEMORY_CPU, 0}};
+
+  const char* input_buffer;
+  size_t input_buffer_byte_size;
+  TRITONSERVER_MemoryType input_buffer_memory_type;
+  int64_t input_buffer_memory_type_id;
+
+  RESPOND_ALL_AND_SET_NULL_IF_ERROR(
+      responses, request_count,
+      collector.ProcessTensor(
+          "IN0", nullptr /* existing_buffer */,
+          0 /* existing_buffer_byte_size */, allowed_input_types, &input_buffer,
+          &input_buffer_byte_size, &input_buffer_memory_type,
+          &input_buffer_memory_type_id));
+
+  // Finalize the collector. If 'true' is returned, 'input_buffer'
+  // will not be valid until the backend synchronizes the CUDA
+  // stream or event that was used when creating the collector. For
+  // this backend, GPU is not supported and so no CUDA sync should
+  // be needed; so if 'true' is returned simply log an error.
+  const bool need_cuda_input_sync = collector.Finalize();
+  if (need_cuda_input_sync) {
+    LOG_MESSAGE(
+        TRITONSERVER_LOG_ERROR,
+        "'minimal' backend: unexpected CUDA sync required by collector");
+  }
+
+  // 'input_buffer' contains the batched "IN0" tensor. The backend can
+  // implement whatever logic is necesary to produce "OUT0". This
+  // backend simply returns the IN0 value in OUT0 so no actual
+  // computation is needed.
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("model ") + model_state->Name() + ": requests in batch " +
+       std::to_string(request_count))
+          .c_str());
+  std::string tstr;
+  IGNORE_ERROR(BufferAsTypedString(
+      tstr, input_buffer, input_buffer_byte_size, TRITONSERVER_TYPE_INT32));
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("batched IN0 value: ") + tstr).c_str());
+
+  const char* output_buffer = input_buffer;
+  TRITONSERVER_MemoryType output_buffer_memory_type = input_buffer_memory_type;
+  int64_t output_buffer_memory_type_id = input_buffer_memory_type_id;
+
+  // This backend supports models that batch along the first dimension
+  // and those that don't batch. For non-batch models the output shape
+  // will be [ 4 ]. For batch models the output shape will be [ -1, 4
+  // ] and the backend "responder" utility below will set the
+  // appropriate batch dimension value for each response.
+  std::vector<int64_t> output_batch_shape;
+  bool supports_first_dim_batching;
+  RESPOND_ALL_AND_SET_NULL_IF_ERROR(
+      responses, request_count,
+      model_state->SupportsFirstDimBatching(&supports_first_dim_batching));
+  if (supports_first_dim_batching) {
+    output_batch_shape.push_back(-1);
+  }
+  output_batch_shape.push_back(4);
+
+  // Because the OUT0 values are concatenated into a single contiguous
+  // 'output_buffer', the backend must "scatter" them out to the
+  // individual response OUT0 tensors.  The backend utilities provide
+  // a "responder" to facilitate this scattering process.
+
+  // The 'responders's ProcessTensor function will copy the portion of
+  // 'output_buffer' corresonding to each request's output into the
+  // response for that request.
+
+  BackendOutputResponder responder(
+      requests, request_count, &responses, model_state->TritonMemoryManager(),
+      supports_first_dim_batching, false /* pinned_enabled */,
+      nullptr /* stream*/);
+
+  responder.ProcessTensor(
+      "OUT0", TRITONSERVER_TYPE_INT32, output_batch_shape, output_buffer,
+      output_buffer_memory_type, output_buffer_memory_type_id);
+
+  // Finalize the responder. If 'true' is returned, the OUT0
+  // tensors' data will not be valid until the backend synchronizes
+  // the CUDA stream or event that was used when creating the
+  // responder. For this backend, GPU is not supported and so no
+  // CUDA sync should be needed; so if 'true' is returned simply log
+  // an error.
+  const bool need_cuda_output_sync = responder.Finalize();
+  if (need_cuda_output_sync) {
+    LOG_MESSAGE(
+        TRITONSERVER_LOG_ERROR,
+        "'minimal' backend: unexpected CUDA sync required by responder");
+  }
+
+  // Send all the responses that haven't already been sent because of
+  // an earlier error.
+  for (auto& response : responses) {
+    if (response != nullptr) {
+      LOG_IF_ERROR(
+          TRITONBACKEND_ResponseSend(
+              response, TRITONSERVER_RESPONSE_COMPLETE_FINAL, nullptr),
+          "failed to send response");
+    }
+  }
+
+  // Done with the request objects so release them.
+  for (uint32_t r = 0; r < request_count; ++r) {
+    auto& request = requests[r];
+    LOG_IF_ERROR(
+        TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
+        "failed releasing request");
+  }
+
+  return nullptr;  // success
+}
+
+}  // extern "C"
+
+}}}  // namespace triton::backend::minimal
--- a/3rdparty/backend-r22.12/examples/backends/recommended/cmake/TutorialRecommendedBackendConfig.cmake.in
+++ b/3rdparty/backend-r22.12/examples/backends/recommended/cmake/TutorialRecommendedBackendConfig.cmake.in
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TUTORIALRECOMMENDEDBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TUTORIALRECOMMENDEDBACKEND_CMAKE_DIR})
+
+if(NOT TARGET TutorialRecommendedBackend::triton-recommended-backend)
+  include("${TUTORIALRECOMMENDEDBACKEND_CMAKE_DIR}/TutorialRecommendedBackendTargets.cmake")
+endif()
+
+set(TUTORIALRECOMMENDEDBACKEND_LIBRARIES TutorialRecommendedBackend::triton-recommended-backend)