添加下载的代码

b30f3cdb · xiabo · e38ee081 · b30f3cdb · b30f3cdb · b30f3cdb
Commit b30f3cdb authored Nov 14, 2023 by xiabo
20 changed files
--- a/3rdparty/backend-r22.12/.clang-format
+++ b/3rdparty/backend-r22.12/.clang-format
+---
+BasedOnStyle: Google
+
+IndentWidth: 2
+ContinuationIndentWidth: 4
+UseTab: Never
+MaxEmptyLinesToKeep: 2
+
+SortIncludes: true
+CompactNamespaces: true
+ReflowComments: true
+
+DerivePointerAlignment: false
+PointerAlignment: Left
+
+AllowShortIfStatementsOnASingleLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Inline
+
+AlwaysBreakAfterReturnType: TopLevelDefinitions
+AlignAfterOpenBracket: AlwaysBreak
+BreakBeforeBraces: Custom
+BraceWrapping:
+  AfterClass: false
+  AfterControlStatement: false
+  AfterEnum: false
+  AfterFunction: true
+  AfterNamespace: false
+  AfterStruct: false
+  AfterUnion: false
+  BeforeCatch: true
+
+BinPackArguments: true
+BinPackParameters: true
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+
+IndentCaseLabels: true
\ No newline at end of file
--- a/3rdparty/backend-r22.12/.gitignore
+++ b/3rdparty/backend-r22.12/.gitignore
+/build
+/.vscode
+*.so
--- a/3rdparty/backend-r22.12/LICENSE
+++ b/3rdparty/backend-r22.12/LICENSE
+Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+ * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+ * Neither the name of NVIDIA CORPORATION nor the names of its
+   contributors may be used to endorse or promote products derived
+   from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/3rdparty/backend-r22.12/README.md
+++ b/3rdparty/backend-r22.12/README.md
--- a/3rdparty/backend-r22.12/cmake/TritonBackendConfig.cmake.in
+++ b/3rdparty/backend-r22.12/cmake/TritonBackendConfig.cmake.in
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TRITONBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TRITONBACKEND_CMAKE_DIR})
+
+if(NOT TARGET TritonBackend::triton-backend-utils)
+  include("${TRITONBACKEND_CMAKE_DIR}/TritonBackendTargets.cmake")
+endif()
+
+set(TRITONBACKEND_LIBRARIES TritonBackend::triton-backend-utils)
--- a/3rdparty/backend-r22.12/docs/backend_platform_support_matrix.md
+++ b/3rdparty/backend-r22.12/docs/backend_platform_support_matrix.md
+<!--
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Backend-Platform Support Matrix
+
+Even though Triton supports inference across various platforms such as
+cloud, data center, edge and embedded devices on NVIDIA GPUs, x86 and
+ARM CPU, or AWS Inferentia, it does so by relying on the backends.
+Note that not all Triton backends support every platform. The purpose
+of this document is to go over what all compute platforms are supported
+by each of these Triton backends.
+GPU in this document refers to Nvidia GPU. See
+[GPU, Driver, and CUDA Support Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)
+to learn more about supported GPUs.
+
+## Ubuntu 20.04
+
+The table below describes target device(s) supported for inference by
+each backend on different platforms.
+
+| Backend      | x86       | ARM-SBSA      |
+| ------------ | --------- | ------------- |
+| TensorRT     |  :heavy_check_mark: GPU <br/> :x: CPU | :heavy_check_mark: GPU <br/> :x: CPU       |
+| ONNX Runtime |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU      |
+| TensorFlow   |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU      |
+| PyTorch      |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU      |
+| OpenVINO     |  :x: GPU <br/> :heavy_check_mark: CPU    |     :x: GPU <br/> :x: CPU       |
+| Python[^1]   |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |
+| DALI         |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  | :heavy_check_mark: GPU[^2] <br/> :heavy_check_mark: CPU[^2] |
+| FIL          |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |  Unsupported  |
+
+
+
+## Windows 10
+
+Only TensorRT and ONNX Runtime backends are supported on Windows.
+
+| Backend      | x86       | ARM-SBSA      |
+| ------------ | --------- | ------------- |
+| TensorRT     |  :heavy_check_mark: GPU <br/> :x: CPU | :heavy_check_mark: GPU <br/> :x: CPU       |
+| ONNX Runtime |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU      |
+
+
+## Jetson JetPack
+
+Following backends are currently supported on Jetson Jetpack:
+
+| Backend      |   Jetson  |
+| ------------ | --------- |
+| TensorRT     |  :heavy_check_mark: GPU <br/> :x: CPU    |
+| ONNX Runtime |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |
+| TensorFlow   |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |
+| PyTorch      |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |
+| Python[^1]   |  :x: GPU <br/> :heavy_check_mark: CPU    |
+
+
+Look at the [Triton Inference Server Support for Jetson and JetPack](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/jetson.md).
+
+
+## AWS Inferentia
+
+Currently, inference on AWS Inferentia is only supported via
+[python backend](https://github.com/triton-inference-server/python_backend#running-with-inferentia)
+where the deployed python script invokes AWS Neuron SDK.
+
+
+[^1]: The supported devices for Python Backend are mentioned with
+respect to Triton. The python script running in Python Backend can
+be used to execute inference on any hardware if there are available
+python APIs to do so. AWS inferentia is one such example. Triton
+core is largely unaware of the fact that inference will run on
+Inferentia.
+
+[^2]: In case of ARM-SBSA, some operations are not fully supported.
--- a/3rdparty/backend-r22.12/examples/README.md
+++ b/3rdparty/backend-r22.12/examples/README.md
--- a/3rdparty/backend-r22.12/examples/backends/bls/README.md
+++ b/3rdparty/backend-r22.12/examples/backends/bls/README.md
+<!--
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# *BLS* Triton Backend
+
+The [*BLS*](../bls) backend demonstrates using in-process C-API to
+execute inferences within the backend. This backend serves as an example to
+backend developers for implementing their own custom pipeline in C++.
+For Python use cases, please refer to 
+[Business Logic Scripting](https://github.com/triton-inference-server/python_backend/blob/main/README.md#business-logic-scripting)
+section in Python backend.
+
+The source code for the *bls* backend is contained in
+[src](./src).
+
+* [backend.cc](./src/backend.cc) contains the main backend
+implementation. The content of this file is not BLS specific. It only includes
+the required Triton backend functions that is standard for any backend
+implementation. The BLS logic is set off in the
+`TRITONBACKEND_ModelInstanceExecute` with lines `bls_executor.Execute(requests[r], &responses[r]);`.
+
+* [bls.h](./src/bls.h) is where the BLS (class `BLSExecutor`) of
+this example is located. You can refer to this file to see how to interact with
+Triton in-process C-API to build the custom execution pipeline.
+
+* [bls_utils.h](./src/bls_utils.h) is where all the utilities that
+are not BLS dependent are located.
+
+The source code contains extensive documentation describing the operation of
+the backend and the use of the
+[Triton Backend API](../../../README.md#triton-backend-api) and the
+[Triton Server API](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/inference_protocols.md#in-process-triton-server-api).
+Before reading the source code, make sure you understand
+the concepts associated with Triton backend abstractions
+[TRITONBACKEND_Backend](../../../README.md#tritonbackend_backend),
+[TRITONBACKEND_Model](../../../README.md#tritonbackend_model), and
+[TRITONBACKEND_ModelInstance](../../../README.md#tritonbackend_modelinstance).
+
+The *bls* backend will send two requests on the 'addsub_python' and 'addsub_tf'
+models. After the inference requests are completed, this backend will extract
+OUTPUT0 from the 'addsub_python' and OUTPUT1 from the 'addsub_tf' model to
+construct the final inference response object using these tensors.
+
+There are some self-imposed limitations that were made for the simplicity of
+this example:
+1. This backend does not support batching.
+2. This backend does not support decoupled models.
+3. This backend does not support GPU tensors.
+4. The model configuraion should be strictly set as the comments described in
+[backend.cc](./src/backend.cc).
+
+You can implement your custom backend that is not limited to the limitations
+mentioned above.
+
+## Building the *BLS* Backend
+
+[backends/bls/CMakeLists.txt](CMakeLists.txt)
+shows the recommended build and install script for a Triton
+backend. Building and installing is the same as decribed in [Building
+the *Minimal* Backend](../../README.md#building-the-minimal-backend).
+
+## Running Triton with the *BLS* Backend
+
+After adding the *bls* backend to the Triton server as
+described in [Backend Shared
+Library](../../../README.md#backend-shared-library), you can run Triton and
+have it load the models in
+[model_repos/bls_models](../../model_repos/bls_models). Assuming you have created a
+*tritonserver* Docker image by adding the *bls* backend to Triton, the
+following command will run Triton:
+
+```
+$ docker run --rm -it --net=host -v/path/to/model_repos/bls_models:/models tritonserver --model-repository=/models
+```
+
+The console output will show similar to the following indicating that
+the *bls_fp32*, *addsub_python* and *addsub_tf* models from the bls_models repository have
+loaded correctly.
+
+```
+I0616 09:34:47.767433 19214 server.cc:629] 
+---------------+---------+--------+
+| Model         | Version | Status |
+---------------+---------+--------+
+| addsub_python | 1       | READY  |
+| addsub_tf     | 1       | READY  |
+| bls_fp32      | 1       | READY  |
+---------------+---------+--------+
+```
+
+## Testing the *BLS* Backend
+
+The [clients](../../clients) directory holds example clients. The
+[bls_client](../../clients/bls_client) Python script demonstrates sending an
+inference requests to the *bls* backend. With Triton running as
+described in [Running Triton with the *BLS* Backend](#running-triton-with-the-bls-backend),
+execute the client:
+
+```
+$ clients/bls_client
+```
+
+You should see an output similar to the output below:
+
+```
+INPUT0 ([0.42935285 0.51512766 0.43625894 ... 0.6670954  0.17747518 0.7976901 ]) + INPUT1 ([6.7752063e-01 2.4223252e-01 6.7743927e-01 ... 4.1531715e-01 2.5451833e-01 7.9097062e-01]) = OUTPUT0 ([1.1068735  0.75736016 1.1136982 ... 1.0824126  0.4319935  1.5886607 ])
+INPUT0 ([0.42935285 0.51512766 0.43625894 ... 0.6670954  0.17747518 0.7976901 ]) - INPUT1 ([6.7752063e-01 2.4223252e-01 6.7743927e-01 ... 4.1531715e-01 2.5451833e-01 7.9097062e-01]) = OUTPUT1 ([-0.24816778  0.27289516 -0.24118033 ... 0.25177827 -0.07704315  0.00671947])
+
+PASS
+```
--- a/3rdparty/backend-r22.12/examples/backends/bls/cmake/TritonBLSBackendConfig.cmake.in
+++ b/3rdparty/backend-r22.12/examples/backends/bls/cmake/TritonBLSBackendConfig.cmake.in
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TRITONBLSBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TRITONBLSBACKEND_CMAKE_DIR})
+
+if(NOT TARGET TritonBLSBackend::triton-bls-backend)
+  include("${TRITONBLSBACKEND_CMAKE_DIR}/TritonBLSBackendTargets.cmake")
+endif()
+
+set(TRITONBLSBACKEND_LIBRARIES TritonBLSBackend::triton-bls-backend)
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/backend.cc
+++ b/3rdparty/backend-r22.12/examples/backends/bls/src/backend.cc
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "bls.h"
+#include "triton/backend/backend_model.h"
+#include "triton/backend/backend_model_instance.h"
+
+//
+// Backend that demonstrates using in-process C-API to execute inferences
+// within the backend.
+//
+// Two particular models, 'addsub_python' and 'addsub_tf', must be loaded on
+// the server for a successful inference execution on this backend.
+//
+// The model configuration should be set as follows in order to be in line with
+// the 'addsub_python' and 'addsub_tf' models. This backend does not support
+// batching. These limitations are only for this specific backend. You can
+// implement your custom BLS backend with less limitations.
+//
+// Model Configuration:
+//   - Input 'INPUT0' must have shape [16] and datatype must be TYPE_FP32.
+//
+//   - Input 'INPUT1' must have shape [16] and datatype must be TYPE_FP32.
+//
+//   - For each response, output 'OUTPUT0' must have shape [16] and
+//     datatype TYPE_FP32.
+//
+//   - For each response, output 'OUTPUT1' must have shape [16] and
+//     datatype TYPE_FP32.
+//
+// This backend will send two requests on the 'addsub_python' and 'addsub_tf'
+// models. After the inference requests are completed, this backend
+// will extract OUTPUT0 from the 'addsub_python' and OUTPUT1 from the
+// 'addsub_tf' model to construct the final inference response object using
+// these tensors.
+
+namespace triton { namespace backend { namespace bls {
+
+//
+// ModelState
+//
+// State associated with a model that is using this backend. An object
+// of this class is created and associated with each
+// TRITONBACKEND_Model.
+//
+class ModelState : public BackendModel {
+ public:
+  static TRITONSERVER_Error* Create(
+      TRITONBACKEND_Model* triton_model, ModelState** state);
+  virtual ~ModelState() = default;
+
+  // Validate that model configuration is supported by this backend.
+  TRITONSERVER_Error* ValidateModelConfig();
+
+ private:
+  ModelState(TRITONBACKEND_Model* triton_model) : BackendModel(triton_model) {}
+};
+
+TRITONSERVER_Error*
+ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
+{
+  try {
+    *state = new ModelState(triton_model);
+  }
+  catch (const BackendModelException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+ModelState::ValidateModelConfig()
+{
+  // We have the json DOM for the model configuration...
+  common::TritonJson::WriteBuffer buffer;
+  RETURN_IF_ERROR(model_config_.PrettyWrite(&buffer));
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("model configuration:\n") + buffer.Contents()).c_str());
+
+  // max_batch_size must be 0 because this backend does not support
+  // batching
+  int64_t max_batch_size;
+  RETURN_IF_ERROR(model_config_.MemberAsInt("max_batch_size", &max_batch_size));
+  RETURN_ERROR_IF_FALSE(
+      max_batch_size == 0, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("bls backend only supports models with max_batch_size == 0"));
+
+  common::TritonJson::Value inputs, outputs;
+  RETURN_IF_ERROR(model_config_.MemberAsArray("input", &inputs));
+  RETURN_IF_ERROR(model_config_.MemberAsArray("output", &outputs));
+
+  // There must be 2 inputs and 2 outputs.
+  RETURN_ERROR_IF_FALSE(
+      inputs.ArraySize() == 2, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected 2 inputs, got ") +
+          std::to_string(inputs.ArraySize()));
+  RETURN_ERROR_IF_FALSE(
+      outputs.ArraySize() == 2, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected 2 outputs, got ") +
+          std::to_string(outputs.ArraySize()));
+
+  // Here we rely on the model configuation listing the inputs and
+  // outputs in a specific order, which we shouldn't really require...
+  common::TritonJson::Value input0, input1, output0, output1;
+  RETURN_IF_ERROR(inputs.IndexAsObject(0, &input0));
+  RETURN_IF_ERROR(inputs.IndexAsObject(1, &input1));
+  RETURN_IF_ERROR(outputs.IndexAsObject(0, &output0));
+  RETURN_IF_ERROR(outputs.IndexAsObject(1, &output1));
+
+  // Check tensor names
+  std::string in0_name, in1_name, out0_name, out1_name;
+  RETURN_IF_ERROR(input0.MemberAsString("name", &in0_name));
+  RETURN_IF_ERROR(input1.MemberAsString("name", &in1_name));
+  RETURN_IF_ERROR(output0.MemberAsString("name", &out0_name));
+  RETURN_IF_ERROR(output1.MemberAsString("name", &out1_name));
+
+  RETURN_ERROR_IF_FALSE(
+      in0_name == "INPUT0", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected first input tensor name to be INPUT0, got ") +
+          in0_name);
+  RETURN_ERROR_IF_FALSE(
+      in1_name == "INPUT1", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected second input tensor name to be INPUT1, got ") +
+          in1_name);
+  RETURN_ERROR_IF_FALSE(
+      out0_name == "OUTPUT0", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected first output tensor name to be OUTPUT0, got ") +
+          out0_name);
+  RETURN_ERROR_IF_FALSE(
+      out1_name == "OUTPUT1", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected second output tensor name to be OUTPUT1, got ") +
+          out1_name);
+
+  // Check shapes
+  std::vector<int64_t> in0_shape, in1_shape, out0_shape, out1_shape;
+  RETURN_IF_ERROR(backend::ParseShape(input0, "dims", &in0_shape));
+  RETURN_IF_ERROR(backend::ParseShape(input1, "dims", &in1_shape));
+  RETURN_IF_ERROR(backend::ParseShape(output0, "dims", &out0_shape));
+  RETURN_IF_ERROR(backend::ParseShape(output1, "dims", &out1_shape));
+
+  RETURN_ERROR_IF_FALSE(
+      in0_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected INPUT0 shape to have one dimension, got ") +
+          backend::ShapeToString(in0_shape));
+  RETURN_ERROR_IF_FALSE(
+      in1_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected INPUT1 shape to have one dimension, got ") +
+          backend::ShapeToString(in1_shape));
+  RETURN_ERROR_IF_FALSE(
+      out0_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected OUTPUT0 shape to have one dimension, got ") +
+          backend::ShapeToString(out0_shape));
+  RETURN_ERROR_IF_FALSE(
+      out1_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected OUTPUT1 shape to have one dimension, got ") +
+          backend::ShapeToString(out1_shape));
+
+  // Check datatypes
+  std::string in0_dtype, in1_dtype, out0_dtype, out1_dtype;
+  RETURN_IF_ERROR(input0.MemberAsString("data_type", &in0_dtype));
+  RETURN_IF_ERROR(input1.MemberAsString("data_type", &in1_dtype));
+  RETURN_IF_ERROR(output0.MemberAsString("data_type", &out0_dtype));
+  RETURN_IF_ERROR(output1.MemberAsString("data_type", &out1_dtype));
+
+  RETURN_ERROR_IF_FALSE(
+      in0_dtype == "TYPE_FP32", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected INPUT0 datatype to be TYPE_FP32, got ") +
+          in0_dtype);
+  RETURN_ERROR_IF_FALSE(
+      in1_dtype == "TYPE_FP32", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected INPUT1 datatype to be TYPE_FP32, got ") +
+          in1_dtype);
+  RETURN_ERROR_IF_FALSE(
+      out0_dtype == "TYPE_FP32", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected OUTPUT0 datatype to be TYPE_FP32, got ") +
+          out0_dtype);
+  RETURN_ERROR_IF_FALSE(
+      out1_dtype == "TYPE_FP32", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("expected OUTPUT1 datatype to be TYPE_FP32, got ") +
+          out1_dtype);
+
+  return nullptr;  // success
+}
+
+//
+// ModelInstanceState
+//
+// State associated with a model instance. An object of this class is
+// created and associated with each TRITONBACKEND_ModelInstance.
+//
+class ModelInstanceState : public BackendModelInstance {
+ public:
+  static TRITONSERVER_Error* Create(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance,
+      ModelInstanceState** state);
+  virtual ~ModelInstanceState() = default;
+
+  void ProcessRequests(
+      TRITONBACKEND_Request** requests, const uint32_t request_count);
+
+ private:
+  ModelInstanceState(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance)
+      : BackendModelInstance(model_state, triton_model_instance)
+  {
+  }
+};
+
+TRITONSERVER_Error*
+ModelInstanceState::Create(
+    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
+    ModelInstanceState** state)
+{
+  try {
+    *state = new ModelInstanceState(model_state, triton_model_instance);
+  }
+  catch (const BackendModelInstanceException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelInstanceException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+void
+ModelInstanceState::ProcessRequests(
+    TRITONBACKEND_Request** requests, const uint32_t request_count)
+{
+  uint64_t exec_start_ns = 0;
+  SET_TIMESTAMP(exec_start_ns);
+
+  for (size_t i = 0; i < request_count; i++) {
+    // If we get a nullptr request then something is badly wrong. Fail
+    // and release all requests.
+    if (requests[i] == nullptr) {
+      RequestsRespondWithError(
+          requests, request_count,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "null request given to BLS backend for '" + Name() + "'")
+                  .c_str()));
+      return;
+    }
+  }
+
+  // At this point we accept ownership of 'requests', which means that
+  // even if something goes wrong we must still return success from
+  // this function. If something does go wrong in processing a
+  // particular request then we send an error response just for the
+  // specific request.
+  std::vector<TRITONBACKEND_Response*> responses;
+  responses.reserve(request_count);
+
+  for (size_t i = 0; i < request_count; i++) {
+    TRITONBACKEND_Response* response;
+    auto err = TRITONBACKEND_ResponseNew(&response, requests[i]);
+    if (err == nullptr) {
+      responses.emplace_back(response);
+    } else {
+      responses.emplace_back(nullptr);
+      LOG_MESSAGE(TRITONSERVER_LOG_ERROR, "Fail to create response");
+      TRITONSERVER_ErrorDelete(err);
+    }
+  }
+
+  ModelState* model_state = reinterpret_cast<ModelState*>(Model());
+
+  // The way we collect these batch timestamps is not entirely
+  // accurate. Normally, in a performant backend you would execute all
+  // the requests at the same time, and so there would be a single
+  // compute-start / compute-end time-range. But here we execute each
+  // request separately so there is no single range. As a result we
+  // just show the entire execute time as being the compute time as
+  // well.
+  uint64_t compute_start_ns = 0;
+  SET_TIMESTAMP(compute_start_ns);
+
+  // Create a BLSExecutor object. To separate from standard backend
+  // implementation, the BLS logic is placed inside class BLSExecutor.
+  BLSExecutor bls_executor(model_state->TritonServer());
+
+  for (size_t r = 0; r < request_count; r++) {
+    bls_executor.Execute(requests[r], &responses[r]);
+  }
+
+  uint64_t compute_end_ns = 0;
+  SET_TIMESTAMP(compute_end_ns);
+
+  uint64_t exec_end_ns = 0;
+  SET_TIMESTAMP(exec_end_ns);
+
+  // Send all the responses that haven't already been sent because of
+  // an earlier error. Note that the responses are not set to nullptr
+  // here as we need that indication below to determine if the request
+  // we successful or not.
+  for (auto& response : responses) {
+    if (response != nullptr) {
+      LOG_IF_ERROR(
+          TRITONBACKEND_ResponseSend(
+              response, TRITONSERVER_RESPONSE_COMPLETE_FINAL, nullptr),
+          "failed to send BLS backend response");
+    }
+  }
+
+  // Report statistics for each request.
+  for (uint32_t r = 0; r < request_count; ++r) {
+    auto& request = requests[r];
+    LOG_IF_ERROR(
+        TRITONBACKEND_ModelInstanceReportStatistics(
+            TritonModelInstance(), request,
+            (responses[r] != nullptr) /* success */, exec_start_ns,
+            compute_start_ns, compute_end_ns, exec_end_ns),
+        "failed reporting request statistics");
+
+    LOG_IF_ERROR(
+        TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
+        "failed releasing request");
+  }
+
+  // Report the entire batch statistics.
+  LOG_IF_ERROR(
+      TRITONBACKEND_ModelInstanceReportBatchStatistics(
+          TritonModelInstance(), 1 /*total_batch_size*/, exec_start_ns,
+          compute_start_ns, compute_end_ns, exec_end_ns),
+      "failed reporting batch request statistics");
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_VERBOSE,
+      (std::string("TRITONBACKEND_ModelExecute: model ") + Name() +
+       " released " + std::to_string(request_count) + " requests")
+          .c_str());
+}
+
+/////////////
+
+extern "C" {
+
+// Implementing TRITONBACKEND_ModelInitialize is optional. The backend
+// should initialize any state that is intended to be shared across
+// all instances of the model.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelName(model, &cname));
+  std::string name(cname);
+
+  uint64_t version;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelVersion(model, &version));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_ModelInitialize: ") + name + " (version " +
+       std::to_string(version) + ")")
+          .c_str());
+
+  // With each model we create a ModelState object and associate it
+  // with the TRITONBACKEND_Model.
+  ModelState* model_state;
+  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
+  RETURN_IF_ERROR(
+      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));
+
+  // One of the primary things to do in ModelInitialize is to examine
+  // the model configuration to ensure that it is something that this
+  // backend can support. If not, returning an error from this
+  // function will prevent the model from loading.
+  RETURN_IF_ERROR(model_state->ValidateModelConfig());
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelFinalize is optional unless state
+// is set using TRITONBACKEND_ModelSetState. The backend must free
+// this state and perform any other cleanup.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: delete model state");
+
+  delete model_state;
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceInitialize is optional. The
+// backend should initialize any state that is required for a model
+// instance.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceName(instance, &cname));
+  std::string name(cname);
+
+  int32_t device_id;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceDeviceId(instance, &device_id));
+  TRITONSERVER_InstanceGroupKind kind;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceKind(instance, &kind));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" +
+       TRITONSERVER_InstanceGroupKindString(kind) + " device " +
+       std::to_string(device_id) + ")")
+          .c_str());
+
+  // The instance can access the corresponding model as well... here
+  // we get the model and from that get the model's state.
+  TRITONBACKEND_Model* model;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));
+
+  void* vmodelstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);
+
+  // With each instance we create a ModelInstanceState object and
+  // associate it with the TRITONBACKEND_ModelInstance.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(
+      ModelInstanceState::Create(model_state, instance, &instance_state));
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
+      instance, reinterpret_cast<void*>(instance_state)));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_VERBOSE,
+      (std::string("TRITONBACKEND_ModelInstanceInitialize: instance "
+                   "initialization successful ") +
+       name + " (device " + std::to_string(device_id) + ")")
+          .c_str());
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceFinalize is optional unless
+// state is set using TRITONBACKEND_ModelInstanceSetState. The backend
+// must free this state and perform any other cleanup.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
+  ModelInstanceState* instance_state =
+      reinterpret_cast<ModelInstanceState*>(vstate);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      "TRITONBACKEND_ModelInstanceFinalize: delete instance state");
+
+  delete instance_state;
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceExecute is required.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
+    const uint32_t request_count)
+{
+  // Triton will not call this function simultaneously for the same
+  // 'instance'. But since this backend could be used by multiple
+  // instances from multiple models the implementation needs to handle
+  // multiple calls to this function at the same time (with different
+  // 'instance' objects). Suggested practice for this is to use only
+  // function-local and model-instance-specific state (obtained from
+  // 'instance'), which is what we do here.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
+      instance, reinterpret_cast<void**>(&instance_state)));
+  ModelState* model_state =
+      reinterpret_cast<ModelState*>(instance_state->Model());
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_VERBOSE,
+      (std::string("model ") + model_state->Name() + ", instance " +
+       instance_state->Name() + ", executing " + std::to_string(request_count) +
+       " requests")
+          .c_str());
+
+  instance_state->ProcessRequests(requests, request_count);
+
+  return nullptr;  // success
+}
+
+}  // extern "C"
+
+}}}  // namespace triton::backend::bls
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/bls.cc
+++ b/3rdparty/backend-r22.12/examples/backends/bls/src/bls.cc
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "bls.h"
+
+namespace triton { namespace backend { namespace bls {
+
+BLSExecutor::BLSExecutor(TRITONSERVER_Server* server)
+    : server_(server), model_executor_(server)
+{
+}
+
+TRITONSERVER_Error*
+BLSExecutor::PrepareInferenceRequest(
+    TRITONBACKEND_Request* bls_request,
+    TRITONSERVER_InferenceRequest** irequest, const std::string model_name)
+{
+  // Get request_id, correlation_id, and flags from the current request
+  // for preparing a new inference request that we will send to 'addsub_python'
+  // or 'addsub_tf' model later.
+  const char* request_id;
+  uint64_t correlation_id;
+  uint32_t flags;
+  RETURN_IF_ERROR(TRITONBACKEND_RequestId(bls_request, &request_id));
+  RETURN_IF_ERROR(
+      TRITONBACKEND_RequestCorrelationId(bls_request, &correlation_id));
+  RETURN_IF_ERROR(TRITONBACKEND_RequestFlags(bls_request, &flags));
+
+  // Create an inference request object. The inference request object
+  // is where we set the name of the model we want to use for
+  // inference and the input tensors.
+  RETURN_IF_ERROR(TRITONSERVER_InferenceRequestNew(
+      irequest, server_, model_name.c_str(), -1 /* model_version */));
+  // Set request_id, correlation_id, and flags for the new request.
+  RETURN_IF_ERROR(TRITONSERVER_InferenceRequestSetId(*irequest, request_id));
+  RETURN_IF_ERROR(
+      TRITONSERVER_InferenceRequestSetCorrelationId(*irequest, correlation_id));
+  RETURN_IF_ERROR(TRITONSERVER_InferenceRequestSetFlags(*irequest, flags));
+  RETURN_IF_ERROR(TRITONSERVER_InferenceRequestSetReleaseCallback(
+      *irequest, InferRequestComplete, nullptr /* request_release_userp */));
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+BLSExecutor::PrepareInferenceInput(
+    TRITONBACKEND_Request* bls_request, TRITONSERVER_InferenceRequest* irequest)
+{
+  // Get the properties of the two inputs from the current request.
+  // Then, add the two input tensors and append the input data to the new
+  // request.
+  uint32_t input_count;
+  RETURN_IF_ERROR(TRITONBACKEND_RequestInputCount(bls_request, &input_count));
+
+  TRITONBACKEND_Input* input;
+  const char* name;
+  TRITONSERVER_DataType datatype;
+  const int64_t* shape;
+  uint32_t dims_count;
+  size_t data_byte_size;
+  TRITONSERVER_MemoryType data_memory_type;
+  int64_t data_memory_id;
+  const char* data_buffer;
+
+  for (size_t count = 0; count < input_count; count++) {
+    RETURN_IF_ERROR(TRITONBACKEND_RequestInputByIndex(
+        bls_request, count /* index */, &input));
+    RETURN_IF_ERROR(TRITONBACKEND_InputProperties(
+        input, &name, &datatype, &shape, &dims_count, nullptr, nullptr));
+    RETURN_IF_ERROR(TRITONBACKEND_InputBuffer(
+        input, 0 /* idx */, reinterpret_cast<const void**>(&data_buffer),
+        &data_byte_size, &data_memory_type, &data_memory_id));
+    RETURN_IF_ERROR(TRITONSERVER_InferenceRequestAddInput(
+        irequest, name, datatype, shape, dims_count));
+    RETURN_IF_ERROR(TRITONSERVER_InferenceRequestAppendInputData(
+        irequest, name, &data_buffer[0], data_byte_size, data_memory_type,
+        data_memory_id));
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+BLSExecutor::PrepareInferenceOutput(
+    TRITONBACKEND_Request* bls_request, TRITONSERVER_InferenceRequest* irequest)
+{
+  // Indicate the output tensors to be calculated and returned
+  // for the inference request.
+  uint32_t output_count;
+  RETURN_IF_ERROR(TRITONBACKEND_RequestOutputCount(bls_request, &output_count));
+  const char* output_name;
+  for (size_t count = 0; count < output_count; count++) {
+    RETURN_IF_ERROR(TRITONBACKEND_RequestOutputName(
+        bls_request, count /* index */, &output_name));
+    RETURN_IF_ERROR(
+        TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output_name));
+  }
+
+  return nullptr;  // success
+}
+
+void
+BLSExecutor::Execute(
+    TRITONBACKEND_Request* bls_request, TRITONBACKEND_Response** response)
+{
+  // The names of the models that we will send internal requests on.
+  std::vector<std::string> model_names = {"addsub_python", "addsub_tf"};
+
+  // Check if both models are valid before executing request.
+  try {
+    for (size_t i = 0; i < 2; i++) {
+      // Check if the model is ready.
+      bool is_ready = false;
+      THROW_IF_TRITON_ERROR(TRITONSERVER_ServerModelIsReady(
+          server_, model_names[i].c_str(), -1 /* model_version */, &is_ready));
+      if (!is_ready) {
+        throw BLSBackendException(
+            (std::string("Failed to execute the inference request. Model '") +
+             model_names[i].c_str() + "' is not ready.")
+                .c_str());
+      }
+      // For simplicity, decoupled API is not supported in this BLS backend. You
+      // can implement your own backend that supports decoupled models.
+      uint32_t txn_flags;
+      THROW_IF_TRITON_ERROR(TRITONSERVER_ServerModelTransactionProperties(
+          server_, model_names[i].c_str(), -1 /* model_version */, &txn_flags,
+          nullptr /* voidp */));
+      if ((txn_flags & TRITONSERVER_TXN_DECOUPLED) != 0) {
+        throw BLSBackendException(
+            std::string("Model '") + model_names[i].c_str() +
+            "' is using the decoupled. This BLS Backend doesn't support models "
+            "using the decoupled transaction policy.");
+      }
+    }
+  }
+  catch (const BLSBackendException& bls_exception) {
+    LOG_MESSAGE(TRITONSERVER_LOG_ERROR, bls_exception.what());
+    RESPOND_AND_SET_NULL_IF_ERROR(
+        response,
+        TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INTERNAL, "Failed to send inference requests"));
+    return;
+  }
+
+  // Prepare std::future for each model. Since this BLS backend
+  // can handle requests in parallel, we will send all the inference
+  // requests first and then retrieve them later.
+  std::vector<std::future<TRITONSERVER_InferenceResponse*>> futures(2);
+
+  // The inference request object for sending internal requests.
+  TRITONSERVER_InferenceRequest* irequest = nullptr;
+
+  // For each inference request, the backend sends two requests on the
+  // 'addsub_python' and 'addsub_tf' models.
+  try {
+    for (size_t icount = 0; icount < 2; icount++) {
+      // Initialize the inference request with required information.
+      THROW_IF_TRITON_ERROR(
+          PrepareInferenceRequest(bls_request, &irequest, model_names[icount]));
+      THROW_IF_TRITON_ERROR(PrepareInferenceInput(bls_request, irequest));
+      THROW_IF_TRITON_ERROR(PrepareInferenceOutput(bls_request, irequest));
+
+      // Execute inference request.
+      THROW_IF_TRITON_ERROR(
+          model_executor_.AsyncExecute(irequest, &futures[icount]));
+    }
+  }
+  catch (const BLSBackendException& bls_exception) {
+    LOG_MESSAGE(TRITONSERVER_LOG_ERROR, bls_exception.what());
+    LOG_IF_ERROR(
+        TRITONSERVER_InferenceRequestDelete(irequest),
+        "Failed to delete inference request.");
+    RESPOND_AND_SET_NULL_IF_ERROR(
+        response,
+        TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INTERNAL, "Failed to send inference requests"));
+    return;
+  }
+
+  // If both internal requests are sent successfully, retrieve the output from
+  // each request and construct the final response.
+  ConstructFinalResponse(response, std::move(futures));
+}
+
+void
+BLSExecutor::ConstructFinalResponse(
+    TRITONBACKEND_Response** response,
+    std::vector<std::future<TRITONSERVER_InferenceResponse*>> futures)
+{
+  // Prepare two TRITONSERVER_InferenceResponse* objects for 'addsub_python' and
+  // 'addsub_tf' repectively.
+  std::vector<TRITONSERVER_InferenceResponse*> completed_responses = {nullptr,
+                                                                      nullptr};
+
+  const char* output_name;
+  TRITONSERVER_DataType output_datatype;
+  const int64_t* output_shape;
+  uint64_t dims_count;
+  size_t output_byte_size;
+  TRITONSERVER_MemoryType output_memory_type;
+  int64_t output_memory_id;
+  const void* output_base;
+  void* userp;
+  for (size_t icount = 0; icount < 2; icount++) {
+    // Retrieve the corresponding TRITONSERVER_InferenceResponse object from
+    // 'futures'. The InferResponseComplete function sets the std::promise
+    // so that this thread will block until the response is returned.
+    completed_responses[icount] = futures[icount].get();
+    try {
+      THROW_IF_TRITON_ERROR(
+          TRITONSERVER_InferenceResponseError(completed_responses[icount]));
+    }
+    catch (const BLSBackendException& bls_exception) {
+      LOG_MESSAGE(TRITONSERVER_LOG_ERROR, bls_exception.what());
+
+      if (completed_responses[icount] != nullptr) {
+        LOG_IF_ERROR(
+            TRITONSERVER_InferenceResponseDelete(completed_responses[icount]),
+            "Failed to delete inference response.");
+      }
+      return;
+    }
+    // Retrieve outputs from 'completed_responses'.
+    // Extract OUTPUT0 from the 'addsub_python' and OUTPUT1 from the
+    // 'addsub_tf' model to form the final inference response object.
+    // Get all the information about the output tensor.
+    RESPOND_AND_SET_NULL_IF_ERROR(
+        response,
+        TRITONSERVER_InferenceResponseOutput(
+            completed_responses[icount], icount, &output_name, &output_datatype,
+            &output_shape, &dims_count, &output_base, &output_byte_size,
+            &output_memory_type, &output_memory_id, &userp));
+
+    // Create an output tensor in the final response with
+    // the information retrieved above.
+    TRITONBACKEND_Output* output;
+    RESPOND_AND_SET_NULL_IF_ERROR(
+        response, TRITONBACKEND_ResponseOutput(
+                      *response, &output, output_name, output_datatype,
+                      output_shape, dims_count));
+
+    // Get a buffer that holds the tensor data for the output.
+    // We request a buffer in CPU memory but we have to handle any returned
+    // type. If we get back a buffer in GPU memory we just fail the request.
+    void* output_buffer;
+    output_memory_type = TRITONSERVER_MEMORY_CPU;
+    RESPOND_AND_SET_NULL_IF_ERROR(
+        response, TRITONBACKEND_OutputBuffer(
+                      output, &output_buffer, output_byte_size,
+                      &output_memory_type, &output_memory_id));
+    if (output_memory_type == TRITONSERVER_MEMORY_GPU) {
+      RESPOND_AND_SET_NULL_IF_ERROR(
+          response, TRITONSERVER_ErrorNew(
+                        TRITONSERVER_ERROR_INTERNAL,
+                        "failed to create output buffer in CPU memory"));
+    }
+
+    // Fill the BLS output buffer with output data returned by internal
+    // requests.
+    memcpy(output_buffer, output_base, output_byte_size);
+
+    LOG_IF_ERROR(
+        TRITONSERVER_InferenceResponseDelete(completed_responses[icount]),
+        "Failed to delete inference response.");
+  }
+}
+
+}}}  // namespace triton::backend::bls
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/bls.h
+++ b/3rdparty/backend-r22.12/examples/backends/bls/src/bls.h
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <future>
+#include "bls_utils.h"
+#include "triton/backend/backend_common.h"
+#include "triton/core/tritonbackend.h"
+#include "triton/core/tritonserver.h"
+
+namespace triton { namespace backend { namespace bls {
+
+//
+// BLSExecutor
+//
+// Includes the custom BLS logic for this backend.
+// This class shows how to utilize Triton in-process C-API to build the
+// execution pipeline.
+//
+class BLSExecutor {
+ public:
+  BLSExecutor(TRITONSERVER_Server* server);
+
+  // Prepares the inference request that will be used internally.
+  TRITONSERVER_Error* PrepareInferenceRequest(
+      TRITONBACKEND_Request* bls_request,
+      TRITONSERVER_InferenceRequest** irequest, const std::string model_name);
+
+  // Prepares the input for the internal inference request.
+  TRITONSERVER_Error* PrepareInferenceInput(
+      TRITONBACKEND_Request* bls_request,
+      TRITONSERVER_InferenceRequest* irequest);
+
+  // Prepares the output for the internal inference request.
+  TRITONSERVER_Error* PrepareInferenceOutput(
+      TRITONBACKEND_Request* bls_request,
+      TRITONSERVER_InferenceRequest* irequest);
+
+  // Performs the whole BLS pipeline.
+  void Execute(
+      TRITONBACKEND_Request* bls_request, TRITONBACKEND_Response** response);
+
+  // Constructs the final response.
+  void ConstructFinalResponse(
+      TRITONBACKEND_Response** response,
+      std::vector<std::future<TRITONSERVER_InferenceResponse*>> futures);
+
+ private:
+  // The server object that encapsulates all the functionality of the Triton
+  // server and allows access to the Triton server API.
+  TRITONSERVER_Server* server_;
+
+  // The ModelExecutor object for executing inference request on a model.
+  ModelExecutor model_executor_;
+};
+
+}}}  // namespace triton::backend::bls
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/bls_utils.cc
+++ b/3rdparty/backend-r22.12/examples/backends/bls/src/bls_utils.cc
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "bls_utils.h"
+
+namespace triton { namespace backend { namespace bls {
+
+TRITONSERVER_Error*
+CPUAllocator(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, void* userp, void** buffer,
+    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
+    int64_t* actual_memory_type_id)
+{
+  // For simplicity, this backend example always uses CPU memory regardless of
+  // the preferred memory type. You can make the actual memory type and id that
+  // we allocate be the same as preferred memory type. You can also provide a
+  // customized allocator to support different preferred_memory_type, and reuse
+  // memory buffer when possible.
+  *actual_memory_type = TRITONSERVER_MEMORY_CPU;
+  *actual_memory_type_id = preferred_memory_type_id;
+
+  // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
+  // need to do any other book-keeping.
+  if (byte_size == 0) {
+    *buffer = nullptr;
+    *buffer_userp = nullptr;
+    LOG_MESSAGE(
+        TRITONSERVER_LOG_VERBOSE, ("allocated " + std::to_string(byte_size) +
+                                   " bytes for result tensor " + tensor_name)
+                                      .c_str());
+  } else {
+    void* allocated_ptr = nullptr;
+    *actual_memory_type = TRITONSERVER_MEMORY_CPU;
+    allocated_ptr = malloc(byte_size);
+
+    // Pass the tensor name with buffer_userp so we can show it when
+    // releasing the buffer.
+    if (allocated_ptr != nullptr) {
+      *buffer = allocated_ptr;
+      *buffer_userp = new std::string(tensor_name);
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_VERBOSE,
+          ("allocated " + std::to_string(byte_size) + " bytes in " +
+           TRITONSERVER_MemoryTypeString(*actual_memory_type) +
+           " for result tensor " + tensor_name)
+              .c_str());
+    }
+  }
+
+  return nullptr;  // Success
+}
+
+TRITONSERVER_Error*
+ResponseRelease(
+    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
+    size_t byte_size, TRITONSERVER_MemoryType memory_type,
+    int64_t memory_type_id)
+{
+  std::string* name = nullptr;
+  if (buffer_userp != nullptr) {
+    name = reinterpret_cast<std::string*>(buffer_userp);
+  } else {
+    name = new std::string("<unknown>");
+  }
+
+  std::stringstream ss;
+  ss << buffer;
+  std::string buffer_str = ss.str();
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_VERBOSE,
+      ("Releasing buffer " + buffer_str + " of size " +
+       std::to_string(byte_size) + " in " +
+       TRITONSERVER_MemoryTypeString(memory_type) + " for result '" + *name)
+          .c_str());
+
+  switch (memory_type) {
+    case TRITONSERVER_MEMORY_CPU:
+      free(buffer);
+      break;
+    default:
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          std::string(
+              "error: unexpected buffer allocated in CUDA managed memory")
+              .c_str());
+      break;
+  }
+
+  delete name;
+
+  return nullptr;  // Success
+}
+
+void
+InferRequestComplete(
+    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
+{
+  if (request != nullptr) {
+    LOG_IF_ERROR(
+        TRITONSERVER_InferenceRequestDelete(request),
+        "Failed to delete inference request.");
+  }
+}
+
+void
+InferResponseComplete(
+    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
+{
+  // The following logic only works for non-decoupled models as for decoupled
+  // models it may send multiple responses for a request or not send any
+  // responses for a request. Need to modify this function if the model is using
+  // decoupled API.
+  if (response != nullptr) {
+    // Send 'response' to the future.
+    std::promise<TRITONSERVER_InferenceResponse*>* p =
+        reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
+    p->set_value(response);
+    delete p;
+  }
+}
+
+ModelExecutor::ModelExecutor(TRITONSERVER_Server* server) : server_(server)
+{
+  // When triton needs a buffer to hold an output tensor, it will ask
+  // us to provide the buffer. In this way we can have any buffer
+  // management and sharing strategy that we want. To communicate to
+  // triton the functions that we want it to call to perform the
+  // allocations, we create a "response allocator" object. We pass
+  // this response allocate object to triton when requesting
+  // inference. We can reuse this response allocator object for any
+  // number of inference requests.
+  allocator_ = nullptr;
+  THROW_IF_TRITON_ERROR(TRITONSERVER_ResponseAllocatorNew(
+      &allocator_, CPUAllocator, ResponseRelease, nullptr /* start_fn */));
+}
+
+TRITONSERVER_Error*
+ModelExecutor::AsyncExecute(
+    TRITONSERVER_InferenceRequest* irequest,
+    std::future<TRITONSERVER_InferenceResponse*>* future)
+{
+  // Perform inference by calling TRITONSERVER_ServerInferAsync. This
+  // call is asychronous and therefore returns immediately. The
+  // completion of the inference and delivery of the response is done
+  // by triton by calling the "response complete" callback functions
+  // (InferResponseComplete in this case).
+  auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
+  *future = p->get_future();
+
+  RETURN_IF_ERROR(TRITONSERVER_InferenceRequestSetResponseCallback(
+      irequest, allocator_, nullptr /* response_allocator_userp */,
+      InferResponseComplete, reinterpret_cast<void*>(p)));
+
+  RETURN_IF_ERROR(
+      TRITONSERVER_ServerInferAsync(server_, irequest, nullptr /* trace */));
+
+  return nullptr;  // success
+}
+
+}}}  // namespace triton::backend::bls
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/bls_utils.h
+++ b/3rdparty/backend-r22.12/examples/backends/bls/src/bls_utils.h
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <future>
+#include <sstream>
+#include "triton/backend/backend_common.h"
+#include "triton/core/tritonbackend.h"
+#include "triton/core/tritonserver.h"
+
+namespace triton { namespace backend { namespace bls {
+
+#define THROW_IF_TRITON_ERROR(X)                                       \
+  do {                                                                 \
+    TRITONSERVER_Error* tie_err__ = (X);                               \
+    if (tie_err__ != nullptr) {                                        \
+      throw BLSBackendException(TRITONSERVER_ErrorMessage(tie_err__)); \
+    }                                                                  \
+  } while (false)
+
+//
+// BLSBackendException
+//
+// Exception thrown if error occurs in BLSBackend.
+//
+struct BLSBackendException : std::exception {
+  BLSBackendException(const std::string& message) : message_(message) {}
+
+  const char* what() const throw() { return message_.c_str(); }
+
+  std::string message_;
+};
+
+// Performs the allocations of output tensors.
+TRITONSERVER_Error* CPUAllocator(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, void* userp, void** buffer,
+    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
+    int64_t* actual_memory_type_id);
+
+// Callback functions for server inference.
+TRITONSERVER_Error* ResponseRelease(
+    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
+    size_t byte_size, TRITONSERVER_MemoryType memory_type,
+    int64_t memory_type_id);
+void InferRequestComplete(
+    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp);
+void InferResponseComplete(
+    TRITONSERVER_InferenceResponse* response, const uint32_t flags,
+    void* userp);
+
+//
+// ModelExecutor
+//
+// Execute inference request on a model.
+//
+class ModelExecutor {
+ public:
+  ModelExecutor(TRITONSERVER_Server* server);
+
+  // Performs async inference request.
+  TRITONSERVER_Error* AsyncExecute(
+      TRITONSERVER_InferenceRequest* irequest,
+      std::future<TRITONSERVER_InferenceResponse*>* future);
+
+ private:
+  // The server object that encapsulates all the functionality of the Triton
+  // server and allows access to the Triton server API.
+  TRITONSERVER_Server* server_;
+
+  // The allocator object that will be used for allocating output tensors.
+  TRITONSERVER_ResponseAllocator* allocator_;
+};
+
+}}}  // namespace triton::backend::bls
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/libtriton_bls.ldscript
+++ b/3rdparty/backend-r22.12/examples/backends/bls/src/libtriton_bls.ldscript
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+{
+  global:
+    TRITONBACKEND_*;
+  local: *;
+};
--- a/3rdparty/backend-r22.12/examples/backends/minimal/cmake/TutorialMinimalBackendConfig.cmake.in
+++ b/3rdparty/backend-r22.12/examples/backends/minimal/cmake/TutorialMinimalBackendConfig.cmake.in
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TUTORIALMINIMALBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TUTORIALMINIMALBACKEND_CMAKE_DIR})
+
+if(NOT TARGET TutorialMinimalBackend::triton-minimal-backend)
+  include("${TUTORIALMINIMALBACKEND_CMAKE_DIR}/TutorialMinimalBackendTargets.cmake")
+endif()
+
+set(TUTORIALMINIMALBACKEND_LIBRARIES TutorialMinimalBackend::triton-minimal-backend)
--- a/3rdparty/backend-r22.12/examples/backends/minimal/src/libtriton_minimal.ldscript
+++ b/3rdparty/backend-r22.12/examples/backends/minimal/src/libtriton_minimal.ldscript
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+{
+  global:
+    TRITONBACKEND_*;
+  local: *;
+};
--- a/3rdparty/backend-r22.12/examples/backends/minimal/src/minimal.cc
+++ b/3rdparty/backend-r22.12/examples/backends/minimal/src/minimal.cc
+// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "triton/backend/backend_common.h"
+#include "triton/backend/backend_input_collector.h"
+#include "triton/backend/backend_model.h"
+#include "triton/backend/backend_model_instance.h"
+#include "triton/backend/backend_output_responder.h"
+#include "triton/core/tritonbackend.h"
+
+namespace triton { namespace backend { namespace minimal {
+
+//
+// Minimal backend that demonstrates the TRITONBACKEND API. This
+// backend works for any model that has 1 input called "IN0" with
+// INT32 datatype and shape [ 4 ] and 1 output called "OUT0" with
+// INT32 datatype and shape [ 4 ]. The backend supports both batching
+// and non-batching models.
+//
+// For each batch of requests, the backend returns the input tensor
+// value in the output tensor.
+//
+
+/////////////
+
+//
+// ModelState
+//
+// State associated with a model that is using this backend. An object
+// of this class is created and associated with each
+// TRITONBACKEND_Model. ModelState is derived from BackendModel class
+// provided in the backend utilities that provides many common
+// functions.
+//
+class ModelState : public BackendModel {
+ public:
+  static TRITONSERVER_Error* Create(
+      TRITONBACKEND_Model* triton_model, ModelState** state);
+  virtual ~ModelState() = default;
+
+ private:
+  ModelState(TRITONBACKEND_Model* triton_model) : BackendModel(triton_model) {}
+};
+
+TRITONSERVER_Error*
+ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
+{
+  try {
+    *state = new ModelState(triton_model);
+  }
+  catch (const BackendModelException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+extern "C" {
+
+// Triton calls TRITONBACKEND_ModelInitialize when a model is loaded
+// to allow the backend to create any state associated with the model,
+// and to also examine the model configuration to determine if the
+// configuration is suitable for the backend. Any errors reported by
+// this function will prevent the model from loading.
+//
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
+{
+  // Create a ModelState object and associate it with the
+  // TRITONBACKEND_Model. If anything goes wrong with initialization
+  // of the model state then an error is returned and Triton will fail
+  // to load the model.
+  ModelState* model_state;
+  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
+  RETURN_IF_ERROR(
+      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));
+
+  return nullptr;  // success
+}
+
+// Triton calls TRITONBACKEND_ModelFinalize when a model is no longer
+// needed. The backend should cleanup any state associated with the
+// model. This function will not be called until all model instances
+// of the model have been finalized.
+//
+TRITONSERVER_Error*
+TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);
+  delete model_state;
+
+  return nullptr;  // success
+}
+
+}  // extern "C"
+
+/////////////
+
+//
+// ModelInstanceState
+//
+// State associated with a model instance. An object of this class is
+// created and associated with each
+// TRITONBACKEND_ModelInstance. ModelInstanceState is derived from
+// BackendModelInstance class provided in the backend utilities that
+// provides many common functions.
+//
+class ModelInstanceState : public BackendModelInstance {
+ public:
+  static TRITONSERVER_Error* Create(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance,
+      ModelInstanceState** state);
+  virtual ~ModelInstanceState() = default;
+
+  // Get the state of the model that corresponds to this instance.
+  ModelState* StateForModel() const { return model_state_; }
+
+ private:
+  ModelInstanceState(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance)
+      : BackendModelInstance(model_state, triton_model_instance),
+        model_state_(model_state)
+  {
+  }
+
+  ModelState* model_state_;
+};
+
+TRITONSERVER_Error*
+ModelInstanceState::Create(
+    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
+    ModelInstanceState** state)
+{
+  try {
+    *state = new ModelInstanceState(model_state, triton_model_instance);
+  }
+  catch (const BackendModelInstanceException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelInstanceException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+extern "C" {
+
+// Triton calls TRITONBACKEND_ModelInstanceInitialize when a model
+// instance is created to allow the backend to initialize any state
+// associated with the instance.
+//
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
+{
+  // Get the model state associated with this instance's model.
+  TRITONBACKEND_Model* model;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));
+
+  void* vmodelstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);
+
+  // Create a ModelInstanceState object and associate it with the
+  // TRITONBACKEND_ModelInstance.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(
+      ModelInstanceState::Create(model_state, instance, &instance_state));
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
+      instance, reinterpret_cast<void*>(instance_state)));
+
+  return nullptr;  // success
+}
+
+// Triton calls TRITONBACKEND_ModelInstanceFinalize when a model
+// instance is no longer needed. The backend should cleanup any state
+// associated with the model instance.
+//
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
+  ModelInstanceState* instance_state =
+      reinterpret_cast<ModelInstanceState*>(vstate);
+  delete instance_state;
+
+  return nullptr;  // success
+}
+
+}  // extern "C"
+
+/////////////
+
+extern "C" {
+
+// When Triton calls TRITONBACKEND_ModelInstanceExecute it is required
+// that a backend create a response for each request in the batch. A
+// response may be the output tensors required for that request or may
+// be an error that is returned in the response.
+//
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
+    const uint32_t request_count)
+{
+  // Triton will not call this function simultaneously for the same
+  // 'instance'. But since this backend could be used by multiple
+  // instances from multiple models the implementation needs to handle
+  // multiple calls to this function at the same time (with different
+  // 'instance' objects). Best practice for a high-performance
+  // implementation is to avoid introducing mutex/lock and instead use
+  // only function-local and model-instance-specific state.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
+      instance, reinterpret_cast<void**>(&instance_state)));
+  ModelState* model_state = instance_state->StateForModel();
+
+  // 'responses' is initialized as a parallel array to 'requests',
+  // with one TRITONBACKEND_Response object for each
+  // TRITONBACKEND_Request object. If something goes wrong while
+  // creating these response objects, the backend simply returns an
+  // error from TRITONBACKEND_ModelInstanceExecute, indicating to
+  // Triton that this backend did not create or send any responses and
+  // so it is up to Triton to create and send an appropriate error
+  // response for each request. RETURN_IF_ERROR is one of several
+  // useful macros for error handling that can be found in
+  // backend_common.h.
+
+  std::vector<TRITONBACKEND_Response*> responses;
+  responses.reserve(request_count);
+  for (uint32_t r = 0; r < request_count; ++r) {
+    TRITONBACKEND_Request* request = requests[r];
+    TRITONBACKEND_Response* response;
+    RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
+    responses.push_back(response);
+  }
+
+  // At this point, the backend takes ownership of 'requests', which
+  // means that it is responsible for sending a response for every
+  // request. From here, even if something goes wrong in processing,
+  // the backend must return 'nullptr' from this function to indicate
+  // success. Any errors and failures must be communicated via the
+  // response objects.
+  //
+  // To simplify error handling, the backend utilities manage
+  // 'responses' in a specific way and it is recommended that backends
+  // follow this same pattern. When an error is detected in the
+  // processing of a request, an appropriate error response is sent
+  // and the corresponding TRITONBACKEND_Response object within
+  // 'responses' is set to nullptr to indicate that the
+  // request/response has already been handled and no futher processing
+  // should be performed for that request. Even if all responses fail,
+  // the backend still allows execution to flow to the end of the
+  // function. RESPOND_AND_SET_NULL_IF_ERROR, and
+  // RESPOND_ALL_AND_SET_NULL_IF_ERROR are macros from
+  // backend_common.h that assist in this management of response
+  // objects.
+
+  // The backend could iterate over the 'requests' and process each
+  // one separately. But for performance reasons it is usually
+  // preferred to create batched input tensors that are processed
+  // simultaneously. This is especially true for devices like GPUs
+  // that are capable of exploiting the large amount parallelism
+  // exposed by larger data sets.
+  //
+  // The backend utilities provide a "collector" to facilitate this
+  // batching process. The 'collector's ProcessTensor function will
+  // combine a tensor's value from each request in the batch into a
+  // single contiguous buffer. The buffer can be provided by the
+  // backend or 'collector' can create and manage it. In this backend,
+  // there is not a specific buffer into which the batch should be
+  // created, so use ProcessTensor arguments that cause collector to
+  // manage it.
+
+  BackendInputCollector collector(
+      requests, request_count, &responses, model_state->TritonMemoryManager(),
+      false /* pinned_enabled */, nullptr /* stream*/);
+
+  // To instruct ProcessTensor to "gather" the entire batch of IN0
+  // input tensors into a single contiguous buffer in CPU memory, set
+  // the "allowed input types" to be the CPU ones (see tritonserver.h
+  // in the triton-inference-server/core repo for allowed memory
+  // types).
+  std::vector<std::pair<TRITONSERVER_MemoryType, int64_t>> allowed_input_types =
+      {{TRITONSERVER_MEMORY_CPU_PINNED, 0}, {TRITONSERVER_MEMORY_CPU, 0}};
+
+  const char* input_buffer;
+  size_t input_buffer_byte_size;
+  TRITONSERVER_MemoryType input_buffer_memory_type;
+  int64_t input_buffer_memory_type_id;
+
+  RESPOND_ALL_AND_SET_NULL_IF_ERROR(
+      responses, request_count,
+      collector.ProcessTensor(
+          "IN0", nullptr /* existing_buffer */,
+          0 /* existing_buffer_byte_size */, allowed_input_types, &input_buffer,
+          &input_buffer_byte_size, &input_buffer_memory_type,
+          &input_buffer_memory_type_id));
+
+  // Finalize the collector. If 'true' is returned, 'input_buffer'
+  // will not be valid until the backend synchronizes the CUDA
+  // stream or event that was used when creating the collector. For
+  // this backend, GPU is not supported and so no CUDA sync should
+  // be needed; so if 'true' is returned simply log an error.
+  const bool need_cuda_input_sync = collector.Finalize();
+  if (need_cuda_input_sync) {
+    LOG_MESSAGE(
+        TRITONSERVER_LOG_ERROR,
+        "'minimal' backend: unexpected CUDA sync required by collector");
+  }
+
+  // 'input_buffer' contains the batched "IN0" tensor. The backend can
+  // implement whatever logic is necesary to produce "OUT0". This
+  // backend simply returns the IN0 value in OUT0 so no actual
+  // computation is needed.
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("model ") + model_state->Name() + ": requests in batch " +
+       std::to_string(request_count))
+          .c_str());
+  std::string tstr;
+  IGNORE_ERROR(BufferAsTypedString(
+      tstr, input_buffer, input_buffer_byte_size, TRITONSERVER_TYPE_INT32));
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("batched IN0 value: ") + tstr).c_str());
+
+  const char* output_buffer = input_buffer;
+  TRITONSERVER_MemoryType output_buffer_memory_type = input_buffer_memory_type;
+  int64_t output_buffer_memory_type_id = input_buffer_memory_type_id;
+
+  // This backend supports models that batch along the first dimension
+  // and those that don't batch. For non-batch models the output shape
+  // will be [ 4 ]. For batch models the output shape will be [ -1, 4
+  // ] and the backend "responder" utility below will set the
+  // appropriate batch dimension value for each response.
+  std::vector<int64_t> output_batch_shape;
+  bool supports_first_dim_batching;
+  RESPOND_ALL_AND_SET_NULL_IF_ERROR(
+      responses, request_count,
+      model_state->SupportsFirstDimBatching(&supports_first_dim_batching));
+  if (supports_first_dim_batching) {
+    output_batch_shape.push_back(-1);
+  }
+  output_batch_shape.push_back(4);
+
+  // Because the OUT0 values are concatenated into a single contiguous
+  // 'output_buffer', the backend must "scatter" them out to the
+  // individual response OUT0 tensors.  The backend utilities provide
+  // a "responder" to facilitate this scattering process.
+
+  // The 'responders's ProcessTensor function will copy the portion of
+  // 'output_buffer' corresonding to each request's output into the
+  // response for that request.
+
+  BackendOutputResponder responder(
+      requests, request_count, &responses, model_state->TritonMemoryManager(),
+      supports_first_dim_batching, false /* pinned_enabled */,
+      nullptr /* stream*/);
+
+  responder.ProcessTensor(
+      "OUT0", TRITONSERVER_TYPE_INT32, output_batch_shape, output_buffer,
+      output_buffer_memory_type, output_buffer_memory_type_id);
+
+  // Finalize the responder. If 'true' is returned, the OUT0
+  // tensors' data will not be valid until the backend synchronizes
+  // the CUDA stream or event that was used when creating the
+  // responder. For this backend, GPU is not supported and so no
+  // CUDA sync should be needed; so if 'true' is returned simply log
+  // an error.
+  const bool need_cuda_output_sync = responder.Finalize();
+  if (need_cuda_output_sync) {
+    LOG_MESSAGE(
+        TRITONSERVER_LOG_ERROR,
+        "'minimal' backend: unexpected CUDA sync required by responder");
+  }
+
+  // Send all the responses that haven't already been sent because of
+  // an earlier error.
+  for (auto& response : responses) {
+    if (response != nullptr) {
+      LOG_IF_ERROR(
+          TRITONBACKEND_ResponseSend(
+              response, TRITONSERVER_RESPONSE_COMPLETE_FINAL, nullptr),
+          "failed to send response");
+    }
+  }
+
+  // Done with the request objects so release them.
+  for (uint32_t r = 0; r < request_count; ++r) {
+    auto& request = requests[r];
+    LOG_IF_ERROR(
+        TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
+        "failed releasing request");
+  }
+
+  return nullptr;  // success
+}
+
+}  // extern "C"
+
+}}}  // namespace triton::backend::minimal
--- a/3rdparty/backend-r22.12/examples/backends/recommended/cmake/TutorialRecommendedBackendConfig.cmake.in
+++ b/3rdparty/backend-r22.12/examples/backends/recommended/cmake/TutorialRecommendedBackendConfig.cmake.in
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TUTORIALRECOMMENDEDBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TUTORIALRECOMMENDEDBACKEND_CMAKE_DIR})
+
+if(NOT TARGET TutorialRecommendedBackend::triton-recommended-backend)
+  include("${TUTORIALRECOMMENDEDBACKEND_CMAKE_DIR}/TutorialRecommendedBackendTargets.cmake")
+endif()
+
+set(TUTORIALRECOMMENDEDBACKEND_LIBRARIES TutorialRecommendedBackend::triton-recommended-backend)
--- a/3rdparty/backend-r22.12/examples/backends/recommended/src/libtriton_recommended.ldscript
+++ b/3rdparty/backend-r22.12/examples/backends/recommended/src/libtriton_recommended.ldscript
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+{
+  global:
+    TRITONBACKEND_*;
+  local: *;
+};