Adapt to 0.1.0

0a21fff9 · xiabo · 9484fd1c · 0a21fff9 · 0a21fff9 · 0a21fff9
Commit 0a21fff9 authored Dec 20, 2023 by xiabo
20 changed files
--- a/3rdparty/common-r22.12/include/triton/common/nvtx.h
+++ b/3rdparty/common-r22.12/include/triton/common/nvtx.h
+// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#ifdef TRITON_ENABLE_NVTX
+
+#include <nvtx3/nvToolsExt.h>
+
+namespace triton { namespace common {
+
+// Updates a server stat with duration measured by a C++ scope.
+class NvtxRange {
+ public:
+  explicit NvtxRange(const char* label) { nvtxRangePushA(label); }
+
+  explicit NvtxRange(const std::string& label) : NvtxRange(label.c_str()) {}
+
+  ~NvtxRange() { nvtxRangePop(); }
+};
+
+}}  // namespace triton::common
+
+#endif  // TRITON_ENABLE_NVTX
+
+//
+// Macros to access NVTX functionality
+//
+#ifdef TRITON_ENABLE_NVTX
+#define NVTX_INITIALIZE nvtxInitialize(nullptr)
+#define NVTX_RANGE(V, L) triton::common::NvtxRange V(L)
+#define NVTX_MARKER(L) nvtxMarkA(L)
+#else
+#define NVTX_INITIALIZE
+#define NVTX_RANGE(V, L)
+#define NVTX_MARKER(L)
+#endif  // TRITON_ENABLE_NVTX
--- a/3rdparty/common-r22.12/include/triton/common/sync_queue.h
+++ b/3rdparty/common-r22.12/include/triton/common/sync_queue.h
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <condition_variable>
+#include <deque>
+#include <mutex>
+
+namespace triton { namespace common {
+
+//
+// C++11 doesn't have a sync queue so we implement a simple one.
+//
+template <typename Item>
+class SyncQueue {
+ public:
+  SyncQueue() {}
+
+  bool Empty()
+  {
+    std::lock_guard<std::mutex> lk(mu_);
+    return queue_.empty();
+  }
+
+  Item Get()
+  {
+    std::unique_lock<std::mutex> lk(mu_);
+    if (queue_.empty()) {
+      cv_.wait(lk, [this] { return !queue_.empty(); });
+    }
+    auto res = std::move(queue_.front());
+    queue_.pop_front();
+    return res;
+  }
+
+  void Put(const Item& value)
+  {
+    {
+      std::lock_guard<std::mutex> lk(mu_);
+      queue_.push_back(value);
+    }
+    cv_.notify_all();
+  }
+
+  void Put(Item&& value)
+  {
+    {
+      std::lock_guard<std::mutex> lk(mu_);
+      queue_.push_back(std::move(value));
+    }
+    cv_.notify_all();
+  }
+
+ private:
+  std::mutex mu_;
+  std::condition_variable cv_;
+  std::deque<Item> queue_;
+};
+
+}}  // namespace triton::common
--- a/3rdparty/common-r22.12/include/triton/common/table_printer.h
+++ b/3rdparty/common-r22.12/include/triton/common/table_printer.h
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace triton { namespace common {
+
+//
+// An ASCII table printer.
+//
+class TablePrinter {
+ public:
+  // Insert a row at the end of the table
+  void InsertRow(const std::vector<std::string>& row);
+
+  // Print the table
+  std::string PrintTable();
+
+  // TablePrinter will take the ownership of `headers`.
+  TablePrinter(const std::vector<std::string>& headers);
+
+ private:
+  // Update the `shares_` such that all the excess
+  // amount of space not used a column is fairly allocated
+  // to the other columns
+  void FairShare();
+
+  // Append a row to `table`. This function handles the cases where a wrapping
+  // occurs.
+  void AddRow(std::stringstream& table, size_t row_index);
+
+  // Add a row divider
+  void AddRowDivider(std::stringstream& table);
+
+  // Max row width
+  std::vector<size_t> max_widths_;
+
+  // Max row height
+  std::vector<size_t> max_heights_;
+
+  // A vector of vectors of vectors containing data items for every column
+  // The record is stored in a vector of string, where each of the vector items
+  // contains a single line from the record. For example, ["Item 1", "Item 2",
+  // "Item 3\n Item 3 line 2"] will be stored as [["Item 1"], ["Item 2"], ["Item
+  // 3", "Item 3 line 2"]]
+  std::vector<std::vector<std::vector<std::string>>> data_;
+
+  // Fair share of every column
+  std::vector<float> shares_;
+};
+
+}}  // namespace triton::common
--- a/3rdparty/common-r22.12/include/triton/common/thread_pool.h
+++ b/3rdparty/common-r22.12/include/triton/common/thread_pool.h
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <condition_variable>
+#include <functional>
+#include <queue>
+#include <thread>
+
+namespace triton { namespace common {
+
+// Generic fixed-size Thread Pool to execute tasks asynchronously
+
+class ThreadPool {
+ public:
+  explicit ThreadPool(std::size_t thread_count);
+  ~ThreadPool();
+  ThreadPool(const ThreadPool&) = delete;
+  ThreadPool& operator=(const ThreadPool&) = delete;
+
+  using Task = std::function<void(void)>;
+  // Assigns "task" to the task queue for a worker thread to execute when
+  // available. This will not track the return value of the task.
+  void Enqueue(Task&& task);
+  // Returns the number of threads in thread pool
+  size_t Size() { return workers_.size(); }
+
+ private:
+  std::queue<Task> task_queue_;
+  std::mutex queue_mtx_;
+  std::condition_variable cv_;
+  std::vector<std::thread> workers_;
+  // If true, tells pool to stop accepting work and tells awake worker threads
+  // to exit when no tasks are left on the queue.
+  bool stop_ = false;
+};
+
+}}  // namespace triton::common
--- a/3rdparty/common-r22.12/include/triton/common/triton_json.h
+++ b/3rdparty/common-r22.12/include/triton/common/triton_json.h
+// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#ifdef _WIN32
+// Remove GetObject definition from windows.h, which prevents calls to
+// RapidJSON's GetObject.
+// https://github.com/Tencent/rapidjson/issues/1448
+#undef GetObject
+#include <rapidjson/document.h>
+#else
+// Disable class-memaccess warning to facilitate compilation with gcc>7
+// https://github.com/Tencent/rapidjson/issues/1700
+#pragma GCC diagnostic push
+#if defined(__GNUC__) && __GNUC__ >= 8
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+#include <rapidjson/document.h>
+#pragma GCC diagnostic pop
+#endif  // _WIN32
+
+#include <rapidjson/allocators.h>  // CrtAllocator (default) for Writer instantiation
+#include <rapidjson/encodings.h>  // UTF8 (default) for Writer instantiation
+#include <rapidjson/error/en.h>
+#include <rapidjson/prettywriter.h>
+#include <rapidjson/rapidjson.h>
+#include <rapidjson/stringbuffer.h>
+#include <rapidjson/writer.h>
+#include <string>
+#include <vector>
+
+// This header can be used both within Triton server and externally
+// (i.e. in source that interacts only via TRITONSERVER or
+// TRITONBACKEND API). Status is handled differently in these cases so
+// the following macros must be defined before including this
+// header. As an example the defines are shown here as returned by the
+// TRITONSERVER API.
+//
+//   #define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
+//   #define TRITONJSON_STATUSRETURN(M)
+//        return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
+//   #define TRITONJSON_STATUSSUCCESS nullptr
+
+namespace triton { namespace common {
+
+//
+// A JSON parser/writer. Currently based on rapidjson but the intent
+// is to provide an abstraction for JSON functions that make it easy
+// to substitute a different JSON parser. Specifically for rapidjson
+// the class is also designed to provide safe access and error
+// reporting to avoid the cases where rapidjson would just abort the
+// entire application (!).
+//
+class TritonJson {
+ public:
+  class Value;
+  enum class ValueType {
+    OBJECT = rapidjson::kObjectType,
+    ARRAY = rapidjson::kArrayType,
+  };
+
+  //
+  // Buffer used when writing JSON representation.
+  //
+  class WriteBuffer {
+   public:
+    // Get buffer base address.
+    const char* Base() const { return buffer_.c_str(); }
+
+    // Get a reference to the buffer itself. Useful to efficiently
+    // move the contents out of the buffer.
+    std::string& MutableContents() { return buffer_; }
+
+    // Immutable contents.
+    const std::string& Contents() const { return buffer_; }
+
+    // Interface required by rapidjson::Writer
+    typedef char Ch;
+    void Put(char c) { buffer_.push_back(c); }
+    void Clear() { buffer_.clear(); }
+    void Flush() { return; }
+    size_t Size() const { return buffer_.size(); }
+
+   private:
+    std::string buffer_;
+  };
+
+  //
+  // Value representing the entire document or an element within a
+  // document.
+  //
+  class Value {
+   public:
+    // Empty value. Will become a top-level Document value if
+    // initialized by parsing or a non-top-level value if initialized
+    // any other way.
+    explicit Value() : value_(nullptr), allocator_(nullptr) {}
+
+    // Construct a top-level JSON document.
+    explicit Value(const ValueType type)
+        : document_(static_cast<rapidjson::Type>(type)), value_(nullptr),
+          allocator_(&document_.GetAllocator())
+    {
+    }
+
+    // Construct a non-top-level JSON value in a 'document'.
+    explicit Value(TritonJson::Value& document, const ValueType type)
+    {
+      allocator_ = &document.document_.GetAllocator();
+      value_ = new (allocator_->Malloc(sizeof(rapidjson::Value)))
+          rapidjson::Value(static_cast<rapidjson::Type>(type));
+    }
+
+    // Move constructor.
+    explicit Value(Value&& other) { *this = std::move(other); }
+
+    // Move assignment operator.
+    Value& operator=(Value&& other)
+    {
+      document_ = std::move(other.document_);
+      value_ = other.value_;
+      allocator_ = other.allocator_;
+      other.value_ = nullptr;
+      other.allocator_ = nullptr;
+      return *this;
+    }
+
+    // Parse JSON into document. Can only be called on top-level
+    // document value, otherwise error is returned.
+    TRITONJSON_STATUSTYPE Parse(const char* base, const size_t size)
+    {
+      if (value_ != nullptr) {
+        TRITONJSON_STATUSRETURN(
+            std::string("JSON parsing only available for top-level document"));
+      }
+      const unsigned int parseFlags = rapidjson::kParseNanAndInfFlag;
+      document_.Parse<parseFlags>(base, size);
+      if (document_.HasParseError()) {
+        TRITONJSON_STATUSRETURN(std::string(
+            "failed to parse the request JSON buffer: " +
+            std::string(GetParseError_En(document_.GetParseError())) + " at " +
+            std::to_string(document_.GetErrorOffset())));
+      }
+      allocator_ = &document_.GetAllocator();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // \see Parse(const char* base, const size_t size)
+    TRITONJSON_STATUSTYPE Parse(const std::string& json)
+    {
+      return Parse(json.data(), json.size());
+    }
+
+    // Write JSON representation into a 'buffer' in a compact
+    // format. Can only be called for a top-level document value,
+    // otherwise error is returned.
+    TRITONJSON_STATUSTYPE Write(WriteBuffer* buffer) const
+    {
+      if (value_ != nullptr) {
+        TRITONJSON_STATUSRETURN(
+            std::string("JSON writing only available for top-level document"));
+      }
+      const unsigned int writeFlags = rapidjson::kWriteNanAndInfFlag;
+      // Provide default template arguments to pass writeFlags
+      rapidjson::Writer<
+          WriteBuffer, rapidjson::UTF8<>, rapidjson::UTF8<>,
+          rapidjson::CrtAllocator, writeFlags>
+          writer(*buffer);
+      if (!document_.Accept(writer)) {
+        TRITONJSON_STATUSRETURN(
+            std::string("Failed to accept document, invalid JSON."));
+      }
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Write JSON representation into a 'buffer' in an easy-to-read
+    // format. Can only be called for a top-level document value,
+    // otherwise error is returned.
+    TRITONJSON_STATUSTYPE PrettyWrite(WriteBuffer* buffer) const
+    {
+      if (value_ != nullptr) {
+        TRITONJSON_STATUSRETURN(
+            std::string("JSON writing only available for top-level document"));
+      }
+
+      // Can't pass writeFlags with latest release v1.1.0 of rapidjson-dev.
+      // We would need to build rapidjson from source to capture latest fixes.
+      // See this issue:
+      // https://github.com/Tencent/rapidjson/issues/905#issuecomment-370981353
+      // PrettyWrite is only used for displaying model configs currently, so
+      // this should not be an issue.
+      rapidjson::PrettyWriter<WriteBuffer> writer(*buffer);
+      if (!document_.Accept(writer)) {
+        TRITONJSON_STATUSRETURN(
+            std::string("Failed to accept document, invalid JSON."));
+      }
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Swap a value with another.
+    TRITONJSON_STATUSTYPE Swap(TritonJson::Value& other)
+    {
+      rapidjson::Value& value = AsMutableValue();
+      value.Swap(other.AsMutableValue());
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // FIXME Should have Set* for all types.
+
+    // Set/overwrite a signed integer in a value. This changes the
+    // type of the value to signed int.
+    TRITONJSON_STATUSTYPE SetInt(const int64_t value)
+    {
+      rapidjson::Value& v = AsMutableValue();
+      v.SetInt64(value);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Set/overwrite a string in a value. This changes the
+    // type of the value to string
+    TRITONJSON_STATUSTYPE SetString(const std::string& value)
+    {
+      rapidjson::Value& v = AsMutableValue();
+      v.SetString(value.c_str(), value.length(), *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Set/overwrite a string member with provided name and value in this object
+    TRITONJSON_STATUSTYPE SetStringObject(
+        const char* name, const std::string& value)
+    {
+      rapidjson::Value& object = AsMutableValue();
+      if (!object.IsObject()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to add/replace JSON member '") + name +
+            "' to non-object");
+      }
+      auto itr = object.FindMember(name);
+      if (itr == object.MemberEnd()) {
+        AddString(name, value);
+      } else {
+        object.RemoveMember(itr);
+        object.AddMember(
+            rapidjson::Value(rapidjson::StringRef(name)).Move(),
+            rapidjson::Value(value.c_str(), value.size(), *allocator_),
+            *allocator_);
+      }
+
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Add an array or object as a new member to this value. 'value'
+    // is moved into this value and so on return 'value' should not be
+    // used. It is assumed that 'name' can be used by reference, it is
+    // the caller's responsibility to make sure the lifetime of 'name'
+    // extends at least as long as the object.
+    TRITONJSON_STATUSTYPE Add(const char* name, TritonJson::Value&& value)
+    {
+      rapidjson::Value& object = AsMutableValue();
+      if (!object.IsObject()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to add JSON member '") + name +
+            "' to non-object");
+      }
+      if (value.value_ == nullptr) {
+        rapidjson::Value v2;
+        v2.CopyFrom(value.document_, *allocator_);
+        object.AddMember(
+            rapidjson::Value(rapidjson::StringRef(name)).Move(), v2.Move(),
+            *allocator_);
+      } else {
+        object.AddMember(
+            rapidjson::Value(rapidjson::StringRef(name)).Move(),
+            value.value_->Move(), *allocator_);
+      }
+      value.Release();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Add a copy of a string as a new member to this value. It is
+    // assumed that 'name' can be used by reference, it is the
+    // caller's responsibility to make sure the lifetime of 'name'
+    // extends at least as long as the object.
+    TRITONJSON_STATUSTYPE AddString(const char* name, const std::string& value)
+    {
+      rapidjson::Value& object = AsMutableValue();
+      if (!object.IsObject()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to add JSON member '") + name +
+            "' to non-object");
+      }
+      object.AddMember(
+          rapidjson::Value(rapidjson::StringRef(name)).Move(),
+          rapidjson::Value(value.c_str(), value.size(), *allocator_).Move(),
+          *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Add a copy of a explicit-length string as a new member to this
+    // value. It is assumed that 'name' can be used by reference, it
+    // is the caller's responsibility to make sure the lifetime of
+    // 'name' extends at least as long as the object.
+    TRITONJSON_STATUSTYPE AddString(
+        const char* name, const char* value, const size_t len)
+    {
+      rapidjson::Value& object = AsMutableValue();
+      if (!object.IsObject()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to add JSON member '") + name +
+            "' to non-object");
+      }
+      object.AddMember(
+          rapidjson::Value(rapidjson::StringRef(name)).Move(),
+          rapidjson::Value(value, len, *allocator_).Move(), *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Add a reference to a string as a new member to this value. It
+    // is assumed that 'name' and 'value' can be used by reference, it
+    // is the caller's responsibility to make sure the lifetime of
+    // 'name' and 'value' extend at least as long as the object.
+    TRITONJSON_STATUSTYPE AddStringRef(const char* name, const char* value)
+    {
+      rapidjson::Value& object = AsMutableValue();
+      if (!object.IsObject()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to add JSON member '") + name +
+            "' to non-object");
+      }
+      object.AddMember(
+          rapidjson::Value(rapidjson::StringRef(name)).Move(),
+          rapidjson::StringRef(value), *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Add a reference to a expicit-length string as a new member to
+    // this value. It is assumed that 'name' and 'value' can be used
+    // by reference, it is the caller's responsibility to make sure
+    // the lifetime of 'name' and 'value' extend at least as long as
+    // the object.
+    TRITONJSON_STATUSTYPE AddStringRef(
+        const char* name, const char* value, const size_t len)
+    {
+      rapidjson::Value& object = AsMutableValue();
+      if (!object.IsObject()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to add JSON member '") + name +
+            "' to non-object");
+      }
+      object.AddMember(
+          rapidjson::Value(rapidjson::StringRef(name)).Move(),
+          rapidjson::StringRef(value, len), *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Add a boolean new member to this value. It is assumed that
+    // 'name' can be used by reference, it is the caller's
+    // responsibility to make sure the lifetime of 'name' extends at
+    // least as long as the object.
+    TRITONJSON_STATUSTYPE AddBool(const char* name, const bool value)
+    {
+      rapidjson::Value& object = AsMutableValue();
+      if (!object.IsObject()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to add JSON member '") + name +
+            "' to non-object");
+      }
+      object.AddMember(
+          rapidjson::Value(rapidjson::StringRef(name)).Move(),
+          rapidjson::Value(value).Move(), *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Add a signed integer as a new member to this value. It is
+    // assumed that 'name' can be used by reference, it is the
+    // caller's responsibility to make sure the lifetime of 'name'
+    // extends at least as long as the object.
+    TRITONJSON_STATUSTYPE AddInt(const char* name, const int64_t value)
+    {
+      rapidjson::Value& object = AsMutableValue();
+      if (!object.IsObject()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to add JSON member '") + name +
+            "' to non-object");
+      }
+      object.AddMember(
+          rapidjson::Value(rapidjson::StringRef(name)).Move(),
+          rapidjson::Value(value).Move(), *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Add an unsigned integer as a new member to this value. It is
+    // assumed that 'name' can be used by reference, it is the
+    // caller's responsibility to make sure the lifetime of 'name'
+    // extends at least as long as the object.
+    TRITONJSON_STATUSTYPE AddUInt(const char* name, const uint64_t value)
+    {
+      rapidjson::Value& object = AsMutableValue();
+      if (!object.IsObject()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to add JSON member '") + name +
+            "' to non-object");
+      }
+      object.AddMember(
+          rapidjson::Value(rapidjson::StringRef(name)).Move(),
+          rapidjson::Value(value).Move(), *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Add a double as a new member to this value. It is assumed that
+    // 'name' can be used by reference, it is the caller's
+    // responsibility to make sure the lifetime of 'name' extends at
+    // least as long as the object.
+    TRITONJSON_STATUSTYPE AddDouble(const char* name, const double value)
+    {
+      rapidjson::Value& object = AsMutableValue();
+      if (!object.IsObject()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to add JSON member '") + name +
+            "' to non-object");
+      }
+      object.AddMember(
+          rapidjson::Value(rapidjson::StringRef(name)).Move(),
+          rapidjson::Value(value).Move(), *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Append an array or object to this value, which must be an
+    // array. 'value' is moved into this value and so on return
+    // 'value' should not be used.
+    TRITONJSON_STATUSTYPE Append(TritonJson::Value&& value)
+    {
+      rapidjson::Value& array = AsMutableValue();
+      if (!array.IsArray()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to append JSON member to non-array"));
+      }
+      if (value.value_ == nullptr) {
+        rapidjson::Value v2;
+        v2.CopyFrom(value.document_, *allocator_);
+        array.PushBack(v2.Move(), *allocator_);
+      } else {
+        array.PushBack(value.value_->Move(), *allocator_);
+      }
+
+      value.Release();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Append a copy of a string to this value, which must be an
+    // array.
+    TRITONJSON_STATUSTYPE AppendString(const std::string& value)
+    {
+      rapidjson::Value& array = AsMutableValue();
+      if (!array.IsArray()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to append JSON member to non-array"));
+      }
+      array.PushBack(
+          rapidjson::Value(value.c_str(), value.size(), *allocator_).Move(),
+          *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Append a copy of an explicit-length string to this value, which
+    // must be an array.
+    TRITONJSON_STATUSTYPE AppendString(const char* value, const size_t len)
+    {
+      rapidjson::Value& array = AsMutableValue();
+      if (!array.IsArray()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to append JSON member to non-array"));
+      }
+      array.PushBack(
+          rapidjson::Value(value, len, *allocator_).Move(), *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Append a reference to a string to this value, which must be an
+    // array. It is assumed that 'value' can be used by reference, it
+    // is the caller's responsibility to make sure the lifetime of
+    // 'value' extends at least as long as the object.
+    TRITONJSON_STATUSTYPE AppendStringRef(const char* value)
+    {
+      rapidjson::Value& array = AsMutableValue();
+      if (!array.IsArray()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to append JSON member to non-array"));
+      }
+      array.PushBack(rapidjson::StringRef(value), *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Append a reference to a expicit-length string to this value,
+    // which must be an array. It is assumed that 'value' can be used
+    // by reference, it is the caller's responsibility to make sure
+    // the lifetime of 'value' extends at least as long as the object.
+    TRITONJSON_STATUSTYPE AppendStringRef(const char* value, const size_t len)
+    {
+      rapidjson::Value& array = AsMutableValue();
+      if (!array.IsArray()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to append JSON member to non-array"));
+      }
+      array.PushBack(rapidjson::StringRef(value, len), *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Append a boolean to this value, which must be an array.
+    TRITONJSON_STATUSTYPE AppendBool(const bool value)
+    {
+      rapidjson::Value& array = AsMutableValue();
+      if (!array.IsArray()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to append JSON member to non-array"));
+      }
+
+      array.PushBack(rapidjson::Value(value).Move(), *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Append a signed integer to this value, which must be an array.
+    TRITONJSON_STATUSTYPE AppendInt(const int64_t value)
+    {
+      rapidjson::Value& array = AsMutableValue();
+      if (!array.IsArray()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to append JSON member to non-array"));
+      }
+
+      array.PushBack(rapidjson::Value(value).Move(), *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Append an unsigned integer to this value, which must be an
+    // array.
+    TRITONJSON_STATUSTYPE AppendUInt(const uint64_t value)
+    {
+      rapidjson::Value& array = AsMutableValue();
+      if (!array.IsArray()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to append JSON member to non-array"));
+      }
+
+      array.PushBack(rapidjson::Value(value).Move(), *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Append a double to this value, which must be an array.
+    TRITONJSON_STATUSTYPE AppendDouble(const double value)
+    {
+      rapidjson::Value& array = AsMutableValue();
+      if (!array.IsArray()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to append JSON member to non-array"));
+      }
+
+      array.PushBack(rapidjson::Value(value).Move(), *allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Remove member from this object
+    TRITONJSON_STATUSTYPE Remove(const char* name)
+    {
+      rapidjson::Value& object = AsMutableValue();
+      if (!object.IsObject()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to remove JSON member '") + name +
+            "' to non-object");
+      }
+      auto itr = object.FindMember(name);
+      if (itr != object.MemberEnd()) {
+        object.RemoveMember(itr);
+      }  // else report success
+
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Check if this value is of the specified type. Return appropriate
+    // error if not.
+    TRITONJSON_STATUSTYPE AssertType(TritonJson::ValueType type) const
+    {
+      if (static_cast<rapidjson::Type>(type) != AsValue().GetType()) {
+        TRITONJSON_STATUSRETURN(std::string("unexpected type"));
+      }
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get the size of an array. If called on non-array returns zero.
+    size_t ArraySize() const
+    {
+      const rapidjson::Value& array = AsValue();
+      if (!array.IsArray()) {
+        return 0;
+      }
+      return array.GetArray().Size();
+    }
+
+    // Return the specified index contained in this array.
+    TRITONJSON_STATUSTYPE At(
+        const size_t idx, TritonJson::Value* value = nullptr)
+    {
+      rapidjson::Value& array = AsMutableValue();
+      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing array index '") +
+            std::to_string(idx) + "'");
+      }
+      *value = TritonJson::Value(array[idx], allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get the names of all members in an object.  Error if value is
+    // not an object.
+    TRITONJSON_STATUSTYPE Members(std::vector<std::string>* names) const
+    {
+      const rapidjson::Value& object = AsValue();
+      if (!object.IsObject()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to get members for non-object"));
+      }
+      for (const auto& m : object.GetObject()) {
+        names->push_back(m.name.GetString());
+      }
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Return true if this value is an object and the named member is
+    // contained in this object.
+    bool Find(const char* name) const
+    {
+      const rapidjson::Value& object = AsValue();
+      return object.IsObject() && object.HasMember(name);
+    }
+
+    // Return true if this value is an object and the named member is
+    // contained in this object. Return the member in 'value'.
+    bool Find(const char* name, TritonJson::Value* value)
+    {
+      rapidjson::Value& object = AsMutableValue();
+      if (object.IsObject() && object.HasMember(name)) {
+        if (value != nullptr) {
+          *value = TritonJson::Value(object[name], allocator_);
+        }
+        return true;
+      }
+
+      return false;
+    }
+
+    // Whether the object is null value. Note that false will also be retuned
+    // if the object is not a JSON value.
+    bool IsNull() const { return ((value_ != nullptr) && value_->IsNull()); }
+
+    // Return true if the object is an object and it has no members;
+    // false otherwise.
+    bool IsEmpty() const
+    {
+      const rapidjson::Value& object = AsValue();
+      if (object.IsObject() && object.MemberCount() == 0) {
+        return true;
+      }
+      return false;
+    }
+
+    // Get value as a string. The string may contain null or other
+    // special characters and so 'len' must be used to determine length.
+    // Error if value is not a string.
+    TRITONJSON_STATUSTYPE AsString(const char** value, size_t* len) const
+    {
+      if ((value_ == nullptr) || !value_->IsString()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-string as string"));
+      }
+      *value = value_->GetString();
+      *len = value_->GetStringLength();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get value as a string. The string may contain null or other
+    // special characters.  Error if value is not a string.
+    TRITONJSON_STATUSTYPE AsString(std::string* str) const
+    {
+      if ((value_ == nullptr) || !value_->IsString()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-string as string"));
+      }
+      str->assign(value_->GetString(), value_->GetStringLength());
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get value as a boolean. Error if value is not a boolean.
+    TRITONJSON_STATUSTYPE AsBool(bool* value) const
+    {
+      if ((value_ == nullptr) || !value_->IsBool()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-boolean as boolean"));
+      }
+      *value = value_->GetBool();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get value as a signed integer. Error if value is not a signed
+    // integer.
+    TRITONJSON_STATUSTYPE AsInt(int64_t* value) const
+    {
+      if ((value_ == nullptr) || !value_->IsInt64()) {
+        TRITONJSON_STATUSRETURN(std::string(
+            "attempt to access JSON non-signed-integer as signed-integer"));
+      }
+      *value = value_->GetInt64();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get value as an unsigned integer. Error if value is not an
+    // unsigned integer.
+    TRITONJSON_STATUSTYPE AsUInt(uint64_t* value) const
+    {
+      if ((value_ == nullptr) || !value_->IsUint64()) {
+        TRITONJSON_STATUSRETURN(std::string(
+            "attempt to access JSON non-unsigned-integer as unsigned-integer"));
+      }
+      *value = value_->GetUint64();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get value as a double. Error if value is not a double.
+    TRITONJSON_STATUSTYPE AsDouble(double* value) const
+    {
+      if ((value_ == nullptr) || !value_->IsNumber()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-number as double"));
+      }
+      *value = value_->GetDouble();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get named array member contained in this object.
+    TRITONJSON_STATUSTYPE MemberAsArray(
+        const char* name, TritonJson::Value* value)
+    {
+      rapidjson::Value& object = AsMutableValue();
+      if (!object.IsObject() || !object.HasMember(name)) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing object member '") +
+            name + "'");
+      }
+      auto& v = object[name];
+      if (!v.IsArray()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-array as array"));
+      }
+      *value = TritonJson::Value(v, allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get named object member contained in this object.
+    TRITONJSON_STATUSTYPE MemberAsObject(
+        const char* name, TritonJson::Value* value)
+    {
+      rapidjson::Value& object = AsMutableValue();
+      if (!object.IsObject() || !object.HasMember(name)) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing object member '") +
+            name + "'");
+      }
+      auto& v = object[name];
+      if (!v.IsObject()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-object as object"));
+      }
+      *value = TritonJson::Value(v, allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get object member as a string. The string may contain null or other
+    // special characters and so 'len' must be used to determine length.
+    // Error if this is not an object or if the member is not a string.
+    TRITONJSON_STATUSTYPE MemberAsString(
+        const char* name, const char** value, size_t* len) const
+    {
+      const rapidjson::Value& object = AsValue();
+      if (!object.IsObject() || !object.HasMember(name)) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing object member '") +
+            name + "'");
+      }
+      const auto& v = object[name];
+      if (!v.IsString()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-string as string"));
+      }
+      *value = v.GetString();
+      *len = v.GetStringLength();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get object member as a string. The string may contain null or
+    // other special characters.  Error if this is not an object or if
+    // the member is not a string.
+    TRITONJSON_STATUSTYPE MemberAsString(
+        const char* name, std::string* str) const
+    {
+      const rapidjson::Value& object = AsValue();
+      if (!object.IsObject() || !object.HasMember(name)) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing object member '") +
+            name + "'");
+      }
+      const auto& v = object[name];
+      if (!v.IsString()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-string as string"));
+      }
+      str->assign(v.GetString(), v.GetStringLength());
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get object member as a boolean.  Error if this is not an object
+    // or if the member is not a boolean.
+    TRITONJSON_STATUSTYPE MemberAsBool(const char* name, bool* value) const
+    {
+      const rapidjson::Value& object = AsValue();
+      if (!object.IsObject() || !object.HasMember(name)) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing object member '") +
+            name + "'");
+      }
+      const auto& v = object[name];
+      if (!v.IsBool()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-boolean as boolean"));
+      }
+      *value = v.GetBool();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get object member as a signed integer.  Error if this is not an object
+    // or if the member is not a signed integer.
+    TRITONJSON_STATUSTYPE MemberAsInt(const char* name, int64_t* value) const
+    {
+      const rapidjson::Value& object = AsValue();
+      if (!object.IsObject() || !object.HasMember(name)) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing object member '") +
+            name + "'");
+      }
+      const auto& v = object[name];
+      if (!v.IsInt64()) {
+        TRITONJSON_STATUSRETURN(std::string(
+            "attempt to access JSON non-signed-integer as signed-integer"));
+      }
+      *value = v.GetInt64();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get object member as an unsigned integer.  Error if this is not an object
+    // or if the member is not an unsigned integer.
+    TRITONJSON_STATUSTYPE MemberAsUInt(const char* name, uint64_t* value) const
+    {
+      const rapidjson::Value& object = AsValue();
+      if (!object.IsObject() || !object.HasMember(name)) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing object member '") +
+            name + "'");
+      }
+      const auto& v = object[name];
+      if (!v.IsUint64()) {
+        TRITONJSON_STATUSRETURN(std::string(
+            "attempt to access JSON non-unsigned-integer as unsigned-integer"));
+      }
+      *value = v.GetUint64();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get object member as a double.  Error if this is not an object
+    // or if the member is not a double.
+    TRITONJSON_STATUSTYPE MemberAsDouble(const char* name, double* value) const
+    {
+      const rapidjson::Value& object = AsValue();
+      if (!object.IsObject() || !object.HasMember(name)) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing object member '") +
+            name + "'");
+      }
+      const auto& v = object[name];
+      if (!v.IsNumber()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-number as double"));
+      }
+      *value = v.GetDouble();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get array element at a given index within this array.
+    TRITONJSON_STATUSTYPE IndexAsArray(
+        const size_t idx, TritonJson::Value* value)
+    {
+      rapidjson::Value& array = AsMutableValue();
+      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing array index '") +
+            std::to_string(idx) + "'");
+      }
+      auto& v = array[idx];
+      if (!v.IsArray()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-array as array"));
+      }
+      *value = TritonJson::Value(v, allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get object element at a given index within this array.
+    TRITONJSON_STATUSTYPE IndexAsObject(
+        const size_t idx, TritonJson::Value* value)
+    {
+      rapidjson::Value& array = AsMutableValue();
+      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing array index '") +
+            std::to_string(idx) + "'");
+      }
+      auto& v = array[idx];
+      if (!v.IsObject()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-object as object"));
+      }
+      *value = TritonJson::Value(v, allocator_);
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get array index as a string. The string may contain null or
+    // other special characters and so 'len' must be used to determine
+    // length.  Error if this is not an array or if the index element
+    // is not a string.
+    TRITONJSON_STATUSTYPE IndexAsString(
+        const size_t idx, const char** value, size_t* len) const
+    {
+      const rapidjson::Value& array = AsValue();
+      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing array index '") +
+            std::to_string(idx) + "'");
+      }
+      const auto& v = array[idx];
+      if (!v.IsString()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-string as string"));
+      }
+      *value = v.GetString();
+      *len = v.GetStringLength();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get array index as a string. The string may contain null or
+    // other special characters.  Error if this is not an array or if
+    // the index element is not a string.
+    TRITONJSON_STATUSTYPE IndexAsString(
+        const size_t idx, std::string* str) const
+    {
+      const rapidjson::Value& array = AsValue();
+      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing array index '") +
+            std::to_string(idx) + "'");
+      }
+      const auto& v = array[idx];
+      if (!v.IsString()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-string as string"));
+      }
+      str->assign(v.GetString(), v.GetStringLength());
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get array index as a boolean.  Error if this is not an array or
+    // if the index element is not a boolean.
+    TRITONJSON_STATUSTYPE IndexAsBool(const size_t idx, bool* value) const
+    {
+      const rapidjson::Value& array = AsValue();
+      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing array index '") +
+            std::to_string(idx) + "'");
+      }
+      const auto& v = array[idx];
+      if (!v.IsBool()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-boolean as boolean"));
+      }
+      *value = v.GetBool();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get array index as a signed integer.  Error if this is not an array or
+    // if the index element is not a signed integer.
+    TRITONJSON_STATUSTYPE IndexAsInt(const size_t idx, int64_t* value) const
+    {
+      const rapidjson::Value& array = AsValue();
+      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing array index '") +
+            std::to_string(idx) + "'");
+      }
+      const auto& v = array[idx];
+      if (!v.IsInt64()) {
+        TRITONJSON_STATUSRETURN(std::string(
+            "attempt to access JSON non-signed-integer as signed-integer"));
+      }
+      *value = v.GetInt64();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get array index as an unsigned integer.  Error if this is not an array or
+    // if the index element is not an unsigned integer.
+    TRITONJSON_STATUSTYPE IndexAsUInt(const size_t idx, uint64_t* value) const
+    {
+      const rapidjson::Value& array = AsValue();
+      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing array index '") +
+            std::to_string(idx) + "'");
+      }
+      const auto& v = array[idx];
+      if (!v.IsUint64()) {
+        TRITONJSON_STATUSRETURN(std::string(
+            "attempt to access JSON non-unsigned-integer as unsigned-integer"));
+      }
+      *value = v.GetUint64();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Get array index as a double.  Error if this is not an array or
+    // if the index element is not a double.
+    TRITONJSON_STATUSTYPE IndexAsDouble(const size_t idx, double* value) const
+    {
+      const rapidjson::Value& array = AsValue();
+      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access non-existing array index '") +
+            std::to_string(idx) + "'");
+      }
+      const auto& v = array[idx];
+      if (!v.IsNumber()) {
+        TRITONJSON_STATUSRETURN(
+            std::string("attempt to access JSON non-number as double"));
+      }
+      *value = v.GetDouble();
+      return TRITONJSON_STATUSSUCCESS;
+    }
+
+    // Release/clear a value.
+    void Release()
+    {
+      if (value_ != nullptr) {
+        allocator_->Free(value_);
+      }
+    }
+
+   private:
+    // Construct a non-top-level JSON value that references an
+    // existing element in a document.
+    explicit Value(
+        rapidjson::Value& v, rapidjson::Document::AllocatorType* allocator)
+        : value_(&v), allocator_(allocator)
+    {
+    }
+
+    // Return a value object that can be used for both a top-level
+    // document as well as an element within a document.
+    const rapidjson::Value& AsValue() const
+    {
+      if (value_ == nullptr) {
+        return document_;
+      }
+      return *value_;
+    }
+
+    rapidjson::Value& AsMutableValue()
+    {
+      if (value_ == nullptr) {
+        return document_;
+      }
+      return *value_;
+    }
+
+    // If this object a document or value. Based on this only one or
+    // document_ or value_ is valid.
+    rapidjson::Document document_;
+    rapidjson::Value* value_;
+    rapidjson::Document::AllocatorType* allocator_;
+  };
+};
+
+}}  // namespace triton::common
--- a/3rdparty/common-r22.12/protobuf/grpc_service.proto
+++ b/3rdparty/common-r22.12/protobuf/grpc_service.proto
+// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+
+package inference;
+
+//@@.. cpp:namespace:: inference
+
+import "model_config.proto";
+
+//@@
+//@@.. cpp:var:: service InferenceService
+//@@
+//@@   Inference Server GRPC endpoints.
+//@@
+service GRPCInferenceService
+{
+  //@@  .. cpp:var:: rpc ServerLive(ServerLiveRequest) returns
+  //@@       (ServerLiveResponse)
+  //@@
+  //@@     Check liveness of the inference server.
+  //@@
+  rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {}
+
+  //@@  .. cpp:var:: rpc ServerReady(ServerReadyRequest) returns
+  //@@       (ServerReadyResponse)
+  //@@
+  //@@     Check readiness of the inference server.
+  //@@
+  rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {}
+
+  //@@  .. cpp:var:: rpc ModelReady(ModelReadyRequest) returns
+  //@@       (ModelReadyResponse)
+  //@@
+  //@@     Check readiness of a model in the inference server.
+  //@@
+  rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {}
+
+  //@@  .. cpp:var:: rpc ServerMetadata(ServerMetadataRequest) returns
+  //@@       (ServerMetadataResponse)
+  //@@
+  //@@     Get server metadata.
+  //@@
+  rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {}
+
+  //@@  .. cpp:var:: rpc ModelMetadata(ModelMetadataRequest) returns
+  //@@       (ModelMetadataResponse)
+  //@@
+  //@@     Get model metadata.
+  //@@
+  rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {}
+
+  //@@  .. cpp:var:: rpc ModelInfer(ModelInferRequest) returns
+  //@@       (ModelInferResponse)
+  //@@
+  //@@     Perform inference using a specific model.
+  //@@
+  rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {}
+
+  //@@  .. cpp:var:: rpc ModelStreamInfer(stream ModelInferRequest) returns
+  //@@       (stream ModelStreamInferResponse)
+  //@@
+  //@@     Perform streaming inference.
+  //@@
+  rpc ModelStreamInfer(stream ModelInferRequest)
+      returns (stream ModelStreamInferResponse)
+  {
+  }
+
+  //@@  .. cpp:var:: rpc ModelConfig(ModelConfigRequest) returns
+  //@@       (ModelConfigResponse)
+  //@@
+  //@@     Get model configuration.
+  //@@
+  rpc ModelConfig(ModelConfigRequest) returns (ModelConfigResponse) {}
+
+  //@@  .. cpp:var:: rpc ModelStatistics(
+  //@@                     ModelStatisticsRequest)
+  //@@                   returns (ModelStatisticsResponse)
+  //@@
+  //@@     Get the cumulative inference statistics for a model.
+  //@@
+  rpc ModelStatistics(ModelStatisticsRequest) returns (ModelStatisticsResponse)
+  {
+  }
+
+  //@@  .. cpp:var:: rpc RepositoryIndex(RepositoryIndexRequest) returns
+  //@@       (RepositoryIndexResponse)
+  //@@
+  //@@     Get the index of model repository contents.
+  //@@
+  rpc RepositoryIndex(RepositoryIndexRequest) returns (RepositoryIndexResponse)
+  {
+  }
+
+  //@@  .. cpp:var:: rpc RepositoryModelLoad(RepositoryModelLoadRequest) returns
+  //@@       (RepositoryModelLoadResponse)
+  //@@
+  //@@     Load or reload a model from a repository.
+  //@@
+  rpc RepositoryModelLoad(RepositoryModelLoadRequest)
+      returns (RepositoryModelLoadResponse)
+  {
+  }
+
+  //@@  .. cpp:var:: rpc RepositoryModelUnload(RepositoryModelUnloadRequest)
+  //@@       returns (RepositoryModelUnloadResponse)
+  //@@
+  //@@     Unload a model.
+  //@@
+  rpc RepositoryModelUnload(RepositoryModelUnloadRequest)
+      returns (RepositoryModelUnloadResponse)
+  {
+  }
+
+  //@@  .. cpp:var:: rpc SystemSharedMemoryStatus(
+  //@@                     SystemSharedMemoryStatusRequest)
+  //@@                   returns (SystemSharedMemoryStatusRespose)
+  //@@
+  //@@     Get the status of all registered system-shared-memory regions.
+  //@@
+  rpc SystemSharedMemoryStatus(SystemSharedMemoryStatusRequest)
+      returns (SystemSharedMemoryStatusResponse)
+  {
+  }
+
+  //@@  .. cpp:var:: rpc SystemSharedMemoryRegister(
+  //@@                     SystemSharedMemoryRegisterRequest)
+  //@@                   returns (SystemSharedMemoryRegisterResponse)
+  //@@
+  //@@     Register a system-shared-memory region.
+  //@@
+  rpc SystemSharedMemoryRegister(SystemSharedMemoryRegisterRequest)
+      returns (SystemSharedMemoryRegisterResponse)
+  {
+  }
+
+  //@@  .. cpp:var:: rpc SystemSharedMemoryUnregister(
+  //@@                     SystemSharedMemoryUnregisterRequest)
+  //@@                   returns (SystemSharedMemoryUnregisterResponse)
+  //@@
+  //@@     Unregister a system-shared-memory region.
+  //@@
+  rpc SystemSharedMemoryUnregister(SystemSharedMemoryUnregisterRequest)
+      returns (SystemSharedMemoryUnregisterResponse)
+  {
+  }
+
+  //@@  .. cpp:var:: rpc CudaSharedMemoryStatus(
+  //@@                     CudaSharedMemoryStatusRequest)
+  //@@                   returns (CudaSharedMemoryStatusRespose)
+  //@@
+  //@@     Get the status of all registered CUDA-shared-memory regions.
+  //@@
+  rpc CudaSharedMemoryStatus(CudaSharedMemoryStatusRequest)
+      returns (CudaSharedMemoryStatusResponse)
+  {
+  }
+
+  //@@  .. cpp:var:: rpc CudaSharedMemoryRegister(
+  //@@                     CudaSharedMemoryRegisterRequest)
+  //@@                   returns (CudaSharedMemoryRegisterResponse)
+  //@@
+  //@@     Register a CUDA-shared-memory region.
+  //@@
+  rpc CudaSharedMemoryRegister(CudaSharedMemoryRegisterRequest)
+      returns (CudaSharedMemoryRegisterResponse)
+  {
+  }
+
+  //@@  .. cpp:var:: rpc CudaSharedMemoryUnregister(
+  //@@                     CudaSharedMemoryUnregisterRequest)
+  //@@                   returns (CudaSharedMemoryUnregisterResponse)
+  //@@
+  //@@     Unregister a CUDA-shared-memory region.
+  //@@
+  rpc CudaSharedMemoryUnregister(CudaSharedMemoryUnregisterRequest)
+      returns (CudaSharedMemoryUnregisterResponse)
+  {
+  }
+
+  //@@  .. cpp:var:: rpc TraceSetting(TraceSettingRequest)
+  //@@                   returns (TraceSettingResponse)
+  //@@
+  //@@     Update and get the trace setting of the Triton server.
+  //@@
+  rpc TraceSetting(TraceSettingRequest) returns (TraceSettingResponse)
+  {
+  }
+
+  //@@  .. cpp:var:: rpc LogSettings(LogSettingsRequest)
+  //@@                   returns (LogSettingsResponse)
+  //@@
+  //@@     Update and get the log settings of the Triton server.
+  //@@
+  rpc LogSettings(LogSettingsRequest) returns (LogSettingsResponse)
+  {
+  }
+}
+
+//@@
+//@@.. cpp:var:: message ServerLiveRequest
+//@@
+//@@   Request message for ServerLive.
+//@@
+message ServerLiveRequest {}
+
+//@@
+//@@.. cpp:var:: message ServerLiveResponse
+//@@
+//@@   Response message for ServerLive.
+//@@
+message ServerLiveResponse
+{
+  //@@
+  //@@  .. cpp:var:: bool live
+  //@@
+  //@@     True if the inference server is live, false it not live.
+  //@@
+  bool live = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ServerReadyRequest
+//@@
+//@@   Request message for ServerReady.
+//@@
+message ServerReadyRequest {}
+
+//@@
+//@@.. cpp:var:: message ServerReadyResponse
+//@@
+//@@   Response message for ServerReady.
+//@@
+message ServerReadyResponse
+{
+  //@@
+  //@@  .. cpp:var:: bool ready
+  //@@
+  //@@     True if the inference server is ready, false it not ready.
+  //@@
+  bool ready = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelReadyRequest
+//@@
+//@@   Request message for ModelReady.
+//@@
+message ModelReadyRequest
+{
+  //@@
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the model to check for readiness.
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: string version
+  //@@
+  //@@     The version of the model to check for readiness. If not given the
+  //@@     server will choose a version based on the model and internal policy.
+  //@@
+  string version = 2;
+}
+
+//@@
+//@@.. cpp:var:: message ModelReadyResponse
+//@@
+//@@   Response message for ModelReady.
+//@@
+message ModelReadyResponse
+{
+  //@@
+  //@@  .. cpp:var:: bool ready
+  //@@
+  //@@     True if the model is ready, false it not ready.
+  //@@
+  bool ready = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ServerMetadataRequest
+//@@
+//@@   Request message for ServerMetadata.
+//@@
+message ServerMetadataRequest {}
+
+//@@
+//@@.. cpp:var:: message ServerMetadataResponse
+//@@
+//@@   Response message for ServerMetadata.
+//@@
+message ServerMetadataResponse
+{
+  //@@
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The server name.
+  //@@
+  string name = 1;
+
+  //@@
+  //@@  .. cpp:var:: string version
+  //@@
+  //@@     The server version.
+  //@@
+  string version = 2;
+
+  //@@
+  //@@  .. cpp:var:: string extensions (repeated)
+  //@@
+  //@@     The extensions supported by the server.
+  //@@
+  repeated string extensions = 3;
+}
+
+//@@
+//@@.. cpp:var:: message ModelMetadataRequest
+//@@
+//@@   Request message for ModelMetadata.
+//@@
+message ModelMetadataRequest
+{
+  //@@
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the model.
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: string version
+  //@@
+  //@@     The version of the model to check for readiness. If not
+  //@@     given the server will choose a version based on the
+  //@@     model and internal policy.
+  //@@
+  string version = 2;
+}
+
+//@@
+//@@.. cpp:var:: message ModelMetadataResponse
+//@@
+//@@   Response message for ModelMetadata.
+//@@
+message ModelMetadataResponse
+{
+  //@@
+  //@@  .. cpp:var:: message TensorMetadata
+  //@@
+  //@@     Metadata for a tensor.
+  //@@
+  message TensorMetadata
+  {
+    //@@
+    //@@    .. cpp:var:: string name
+    //@@
+    //@@       The tensor name.
+    //@@
+    string name = 1;
+
+    //@@
+    //@@    .. cpp:var:: string datatype
+    //@@
+    //@@       The tensor data type.
+    //@@
+    string datatype = 2;
+
+    //@@
+    //@@    .. cpp:var:: int64 shape (repeated)
+    //@@
+    //@@       The tensor shape. A variable-size dimension is represented
+    //@@       by a -1 value.
+    //@@
+    repeated int64 shape = 3;
+  }
+
+  //@@
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The model name.
+  //@@
+  string name = 1;
+
+  //@@
+  //@@  .. cpp:var:: string versions (repeated)
+  //@@
+  //@@     The versions of the model.
+  //@@
+  repeated string versions = 2;
+
+  //@@
+  //@@  .. cpp:var:: string platform
+  //@@
+  //@@     The model's platform.
+  //@@
+  string platform = 3;
+
+  //@@
+  //@@  .. cpp:var:: TensorMetadata inputs (repeated)
+  //@@
+  //@@     The model's inputs.
+  //@@
+  repeated TensorMetadata inputs = 4;
+
+  //@@
+  //@@  .. cpp:var:: TensorMetadata outputs (repeated)
+  //@@
+  //@@     The model's outputs.
+  //@@
+  repeated TensorMetadata outputs = 5;
+}
+
+//@@
+//@@.. cpp:var:: message InferParameter
+//@@
+//@@   An inference parameter value.
+//@@
+message InferParameter
+{
+  //@@  .. cpp:var:: oneof parameter_choice
+  //@@
+  //@@     The parameter value can be a string, an int64 or
+  //@@     a boolean
+  //@@
+  oneof parameter_choice
+  {
+    //@@    .. cpp:var:: bool bool_param
+    //@@
+    //@@       A boolean parameter value.
+    //@@
+    bool bool_param = 1;
+
+    //@@    .. cpp:var:: int64 int64_param
+    //@@
+    //@@       An int64 parameter value.
+    //@@
+    int64 int64_param = 2;
+
+    //@@    .. cpp:var:: string string_param
+    //@@
+    //@@       A string parameter value.
+    //@@
+    string string_param = 3;
+  }
+}
+
+//@@
+//@@.. cpp:var:: message InferTensorContents
+//@@
+//@@   The data contained in a tensor represented by the repeated type
+//@@   that matches the tensor's data type. Protobuf oneof is not used
+//@@   because oneofs cannot contain repeated fields.
+//@@
+message InferTensorContents
+{
+  //@@
+  //@@  .. cpp:var:: bool bool_contents (repeated)
+  //@@
+  //@@     Representation for BOOL data type. The size must match what is
+  //@@     expected by the tensor's shape. The contents must be the flattened,
+  //@@     one-dimensional, row-major order of the tensor elements.
+  //@@
+  repeated bool bool_contents = 1;
+
+  //@@
+  //@@  .. cpp:var:: int32 int_contents (repeated)
+  //@@
+  //@@     Representation for INT8, INT16, and INT32 data types. The size
+  //@@     must match what is expected by the tensor's shape. The contents
+  //@@     must be the flattened, one-dimensional, row-major order of the
+  //@@     tensor elements.
+  //@@
+  repeated int32 int_contents = 2;
+
+  //@@
+  //@@  .. cpp:var:: int64 int64_contents (repeated)
+  //@@
+  //@@     Representation for INT64 data types. The size must match what
+  //@@     is expected by the tensor's shape. The contents must be the
+  //@@     flattened, one-dimensional, row-major order of the tensor elements.
+  //@@
+  repeated int64 int64_contents = 3;
+
+  //@@
+  //@@  .. cpp:var:: uint32 uint_contents (repeated)
+  //@@
+  //@@     Representation for UINT8, UINT16, and UINT32 data types. The size
+  //@@     must match what is expected by the tensor's shape. The contents
+  //@@     must be the flattened, one-dimensional, row-major order of the
+  //@@     tensor elements.
+  //@@
+  repeated uint32 uint_contents = 4;
+
+  //@@
+  //@@  .. cpp:var:: uint64 uint64_contents (repeated)
+  //@@
+  //@@     Representation for UINT64 data types. The size must match what
+  //@@     is expected by the tensor's shape. The contents must be the
+  //@@     flattened, one-dimensional, row-major order of the tensor elements.
+  //@@
+  repeated uint64 uint64_contents = 5;
+
+  //@@
+  //@@  .. cpp:var:: float fp32_contents (repeated)
+  //@@
+  //@@     Representation for FP32 data type. The size must match what is
+  //@@     expected by the tensor's shape. The contents must be the flattened,
+  //@@     one-dimensional, row-major order of the tensor elements.
+  //@@
+  repeated float fp32_contents = 6;
+
+  //@@
+  //@@  .. cpp:var:: double fp64_contents (repeated)
+  //@@
+  //@@     Representation for FP64 data type. The size must match what is
+  //@@     expected by the tensor's shape. The contents must be the flattened,
+  //@@     one-dimensional, row-major order of the tensor elements.
+  //@@
+  repeated double fp64_contents = 7;
+
+  //@@
+  //@@  .. cpp:var:: bytes bytes_contents (repeated)
+  //@@
+  //@@     Representation for BYTES data type. The size must match what is
+  //@@     expected by the tensor's shape. The contents must be the flattened,
+  //@@     one-dimensional, row-major order of the tensor elements.
+  //@@
+  repeated bytes bytes_contents = 8;
+}
+
+//@@
+//@@.. cpp:var:: message ModelInferRequest
+//@@
+//@@   Request message for ModelInfer.
+//@@
+message ModelInferRequest
+{
+  //@@
+  //@@  .. cpp:var:: message InferInputTensor
+  //@@
+  //@@     An input tensor for an inference request.
+  //@@
+  message InferInputTensor
+  {
+    //@@
+    //@@    .. cpp:var:: string name
+    //@@
+    //@@       The tensor name.
+    //@@
+    string name = 1;
+
+    //@@
+    //@@    .. cpp:var:: string datatype
+    //@@
+    //@@       The tensor data type.
+    //@@
+    string datatype = 2;
+
+    //@@
+    //@@    .. cpp:var:: int64 shape (repeated)
+    //@@
+    //@@       The tensor shape.
+    //@@
+    repeated int64 shape = 3;
+
+    //@@    .. cpp:var:: map<string,InferParameter> parameters
+    //@@
+    //@@       Optional inference input tensor parameters.
+    //@@
+    map<string, InferParameter> parameters = 4;
+
+    //@@    .. cpp:var:: InferTensorContents contents
+    //@@
+    //@@       The tensor contents using a data-type format. This field
+    //@@       must not be specified if tensor contents are being specified
+    //@@       in ModelInferRequest.raw_input_contents.
+    //@@
+    InferTensorContents contents = 5;
+  }
+
+  //@@
+  //@@  .. cpp:var:: message InferRequestedOutputTensor
+  //@@
+  //@@     An output tensor requested for an inference request.
+  //@@
+  message InferRequestedOutputTensor
+  {
+    //@@
+    //@@    .. cpp:var:: string name
+    //@@
+    //@@       The tensor name.
+    //@@
+    string name = 1;
+
+    //@@    .. cpp:var:: map<string,InferParameter> parameters
+    //@@
+    //@@       Optional requested output tensor parameters.
+    //@@
+    map<string, InferParameter> parameters = 2;
+  }
+
+  //@@  .. cpp:var:: string model_name
+  //@@
+  //@@     The name of the model to use for inferencing.
+  //@@
+  string model_name = 1;
+
+  //@@  .. cpp:var:: string model_version
+  //@@
+  //@@     The version of the model to use for inference. If not
+  //@@     given the latest/most-recent version of the model is used.
+  //@@
+  string model_version = 2;
+
+  //@@  .. cpp:var:: string id
+  //@@
+  //@@     Optional identifier for the request. If specified will be
+  //@@     returned in the response.
+  //@@
+  string id = 3;
+
+  //@@  .. cpp:var:: map<string,InferParameter> parameters
+  //@@
+  //@@     Optional inference parameters.
+  //@@
+  map<string, InferParameter> parameters = 4;
+
+  //@@
+  //@@  .. cpp:var:: InferInputTensor inputs (repeated)
+  //@@
+  //@@     The input tensors for the inference.
+  //@@
+  repeated InferInputTensor inputs = 5;
+
+  //@@
+  //@@  .. cpp:var:: InferRequestedOutputTensor outputs (repeated)
+  //@@
+  //@@     The requested output tensors for the inference. Optional, if not
+  //@@     specified all outputs specified in the model config will be
+  //@@     returned.
+  //@@
+  repeated InferRequestedOutputTensor outputs = 6;
+
+  //@@
+  //@@  .. cpp:var:: bytes raw_input_contents
+  //@@
+  //@@     The data contained in an input tensor can be represented in
+  //@@     "raw" bytes form or in the repeated type that matches the
+  //@@     tensor's data type. Using the "raw" bytes form will
+  //@@     typically allow higher performance due to the way protobuf
+  //@@     allocation and reuse interacts with GRPC. For example, see
+  //@@     https://github.com/grpc/grpc/issues/23231.
+  //@@
+  //@@     To use the raw representation 'raw_input_contents' must be
+  //@@     initialized with data for each tensor in the same order as
+  //@@     'inputs'. For each tensor, the size of this content must
+  //@@     match what is expected by the tensor's shape and data
+  //@@     type. The raw data must be the flattened, one-dimensional,
+  //@@     row-major order of the tensor elements without any stride
+  //@@     or padding between the elements. Note that the FP16 and BF16 data
+  //@@     types must be represented as raw content as there is no
+  //@@     specific data type for a 16-bit float type.
+  //@@
+  //@@     If this field is specified then InferInputTensor::contents
+  //@@     must not be specified for any input tensor.
+  //@@
+  repeated bytes raw_input_contents = 7;
+}
+
+//@@
+//@@.. cpp:var:: message ModelInferResponse
+//@@
+//@@   Response message for ModelInfer.
+//@@
+message ModelInferResponse
+{
+  //@@
+  //@@  .. cpp:var:: message InferOutputTensor
+  //@@
+  //@@     An output tensor returned for an inference request.
+  //@@
+  message InferOutputTensor
+  {
+    //@@
+    //@@    .. cpp:var:: string name
+    //@@
+    //@@       The tensor name.
+    //@@
+    string name = 1;
+
+    //@@
+    //@@    .. cpp:var:: string datatype
+    //@@
+    //@@       The tensor data type.
+    //@@
+    string datatype = 2;
+
+    //@@
+    //@@    .. cpp:var:: int64 shape (repeated)
+    //@@
+    //@@       The tensor shape.
+    //@@
+    repeated int64 shape = 3;
+
+    //@@    .. cpp:var:: map<string,InferParameter> parameters
+    //@@
+    //@@       Optional output tensor parameters.
+    //@@
+    map<string, InferParameter> parameters = 4;
+
+    //@@    .. cpp:var:: InferTensorContents contents
+    //@@
+    //@@       The tensor contents using a data-type format. This field
+    //@@       must not be specified if tensor contents are being specified
+    //@@       in ModelInferResponse.raw_output_contents.
+    //@@
+    InferTensorContents contents = 5;
+  }
+
+  //@@  .. cpp:var:: string model_name
+  //@@
+  //@@     The name of the model used for inference.
+  //@@
+  string model_name = 1;
+
+  //@@  .. cpp:var:: string model_version
+  //@@
+  //@@     The version of the model used for inference.
+  //@@
+  string model_version = 2;
+
+  //@@  .. cpp:var:: string id
+  //@@
+  //@@     The id of the inference request if one was specified.
+  //@@
+  string id = 3;
+
+  //@@  .. cpp:var:: map<string,InferParameter> parameters
+  //@@
+  //@@     Optional inference response parameters.
+  //@@
+  map<string, InferParameter> parameters = 4;
+
+  //@@
+  //@@  .. cpp:var:: InferOutputTensor outputs (repeated)
+  //@@
+  //@@     The output tensors holding inference results.
+  //@@
+  repeated InferOutputTensor outputs = 5;
+
+  //@@
+  //@@  .. cpp:var:: bytes raw_output_contents
+  //@@
+  //@@     The data contained in an output tensor can be represented in
+  //@@     "raw" bytes form or in the repeated type that matches the
+  //@@     tensor's data type. Using the "raw" bytes form will
+  //@@     typically allow higher performance due to the way protobuf
+  //@@     allocation and reuse interacts with GRPC. For example, see
+  //@@     https://github.com/grpc/grpc/issues/23231.
+  //@@
+  //@@     To use the raw representation 'raw_output_contents' must be
+  //@@     initialized with data for each tensor in the same order as
+  //@@     'outputs'. For each tensor, the size of this content must
+  //@@     match what is expected by the tensor's shape and data
+  //@@     type. The raw data must be the flattened, one-dimensional,
+  //@@     row-major order of the tensor elements without any stride
+  //@@     or padding between the elements. Note that the FP16 and BF16 data
+  //@@     types must be represented as raw content as there is no
+  //@@     specific data type for a 16-bit float type.
+  //@@
+  //@@     If this field is specified then InferOutputTensor::contents
+  //@@     must not be specified for any output tensor.
+  //@@
+  repeated bytes raw_output_contents = 6;
+}
+
+//@@
+//@@.. cpp:var:: message ModelStreamInferResponse
+//@@
+//@@   Response message for ModelStreamInfer.
+//@@
+message ModelStreamInferResponse
+{
+  //@@
+  //@@  .. cpp:var:: string error_message
+  //@@
+  //@@     The message describing the error. The empty message
+  //@@     indicates the inference was successful without errors.
+  //@@
+  string error_message = 1;
+
+  //@@
+  //@@  .. cpp:var:: ModelInferResponse infer_response
+  //@@
+  //@@     Holds the results of the request.
+  //@@
+  ModelInferResponse infer_response = 2;
+}
+
+//@@
+//@@.. cpp:var:: message ModelConfigRequest
+//@@
+//@@   Request message for ModelConfig.
+//@@
+message ModelConfigRequest
+{
+  //@@
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the model.
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: string version
+  //@@
+  //@@     The version of the model. If not given the model version
+  //@@     is selected automatically based on the version policy.
+  //@@
+  string version = 2;
+}
+
+//@@
+//@@.. cpp:var:: message ModelConfigResponse
+//@@
+//@@   Response message for ModelConfig.
+//@@
+message ModelConfigResponse
+{
+  //@@
+  //@@  .. cpp:var:: ModelConfig config
+  //@@
+  //@@     The model configuration.
+  //@@
+  ModelConfig config = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelStatisticsRequest
+//@@
+//@@   Request message for ModelStatistics.
+//@@
+message ModelStatisticsRequest
+{
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the model. If not given returns statistics for
+  //@@     all models.
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: string version
+  //@@
+  //@@     The version of the model. If not given returns statistics for
+  //@@     all model versions.
+  //@@
+  string version = 2;
+}
+
+
+//@@
+//@@.. cpp:var:: message StatisticDuration
+//@@
+//@@   Statistic recording a cumulative duration metric.
+//@@
+message StatisticDuration
+{
+  //@@  .. cpp:var:: uint64 count
+  //@@
+  //@@     Cumulative number of times this metric occurred.
+  //@@
+  uint64 count = 1;
+
+  //@@  .. cpp:var:: uint64 total_time_ns
+  //@@
+  //@@     Total collected duration of this metric in nanoseconds.
+  //@@
+  uint64 ns = 2;
+}
+
+//@@
+//@@.. cpp:var:: message InferStatistics
+//@@
+//@@   Inference statistics.
+//@@
+message InferStatistics
+{
+  //@@  .. cpp:var:: StatisticDuration success
+  //@@
+  //@@     Cumulative count and duration for successful inference
+  //@@     request. The "success" count and cumulative duration includes
+  //@@     cache hits.
+  //@@
+  StatisticDuration success = 1;
+
+  //@@  .. cpp:var:: StatisticDuration fail
+  //@@
+  //@@     Cumulative count and duration for failed inference
+  //@@     request.
+  //@@
+  StatisticDuration fail = 2;
+
+  //@@  .. cpp:var:: StatisticDuration queue
+  //@@
+  //@@     The count and cumulative duration that inference requests wait in
+  //@@     scheduling or other queues. The "queue" count and cumulative 
+  //@@     duration includes cache hits.
+  //@@
+  StatisticDuration queue = 3;
+
+  //@@  .. cpp:var:: StatisticDuration compute_input
+  //@@
+  //@@     The count and cumulative duration to prepare input tensor data as
+  //@@     required by the model framework / backend. For example, this duration
+  //@@     should include the time to copy input tensor data to the GPU.
+  //@@     The "compute_input" count and cumulative duration do not account for
+  //@@     requests that were a cache hit. See the "cache_hit" field for more
+  //@@     info.
+  //@@
+  StatisticDuration compute_input = 4;
+
+  //@@  .. cpp:var:: StatisticDuration compute_infer
+  //@@
+  //@@     The count and cumulative duration to execute the model.
+  //@@     The "compute_infer" count and cumulative duration do not account for
+  //@@     requests that were a cache hit. See the "cache_hit" field for more
+  //@@     info.
+  //@@
+  StatisticDuration compute_infer = 5;
+
+  //@@  .. cpp:var:: StatisticDuration compute_output
+  //@@
+  //@@     The count and cumulative duration to extract output tensor data
+  //@@     produced by the model framework / backend. For example, this duration
+  //@@     should include the time to copy output tensor data from the GPU.
+  //@@     The "compute_output" count and cumulative duration do not account for
+  //@@     requests that were a cache hit. See the "cache_hit" field for more
+  //@@     info.
+  //@@
+  StatisticDuration compute_output = 6;
+
+  //@@  .. cpp:var:: StatisticDuration cache_hit
+  //@@
+  //@@     The count of response cache hits and cumulative duration to lookup
+  //@@     and extract output tensor data from the Response Cache on a cache
+  //@@     hit. For example, this duration should include the time to copy
+  //@@     output tensor data from the Response Cache to the response object.
+  //@@     On cache hits, triton does not need to go to the model/backend 
+  //@@     for the output tensor data, so the "compute_input", "compute_infer",
+  //@@     and "compute_output" fields are not updated. Assuming the response
+  //@@     cache is enabled for a given model, a cache hit occurs for a
+  //@@     request to that model when the request metadata (model name,
+  //@@     model version, model inputs) hashes to an existing entry in the
+  //@@     cache. On a cache miss, the request hash and response output tensor
+  //@@     data is added to the cache. See response cache docs for more info:
+  //@@     https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
+  //@@
+  StatisticDuration cache_hit = 7;
+
+  //@@  .. cpp:var:: StatisticDuration cache_miss
+  //@@
+  //@@     The count of response cache misses and cumulative duration to lookup
+  //@@     and insert output tensor data from the computed response to the cache.
+  //@@     For example, this duration should include the time to copy
+  //@@     output tensor data from the response object to the Response Cache.
+  //@@     Assuming the response cache is enabled for a given model, a cache
+  //@@     miss occurs for a request to that model when the request metadata
+  //@@     does NOT hash to an existing entry in the cache. See the response
+  //@@     cache docs for more info:
+  //@@     https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
+  //@@
+  StatisticDuration cache_miss = 8;
+}
+
+//@@
+//@@.. cpp:var:: message InferBatchStatistics
+//@@
+//@@   Inference batch statistics.
+//@@
+message InferBatchStatistics
+{
+  //@@  .. cpp:var:: uint64 batch_size
+  //@@
+  //@@     The size of the batch.
+  //@@
+  uint64 batch_size = 1;
+
+  //@@  .. cpp:var:: StatisticDuration compute_input
+  //@@
+  //@@     The count and cumulative duration to prepare input tensor data as
+  //@@     required by the model framework / backend with the given batch size.
+  //@@     For example, this duration should include the time to copy input
+  //@@     tensor data to the GPU.
+  //@@
+  StatisticDuration compute_input = 2;
+
+  //@@  .. cpp:var:: StatisticDuration compute_infer
+  //@@
+  //@@     The count and cumulative duration to execute the model with the given
+  //@@     batch size.
+  //@@
+  StatisticDuration compute_infer = 3;
+
+  //@@  .. cpp:var:: StatisticDuration compute_output
+  //@@
+  //@@     The count and cumulative duration to extract output tensor data
+  //@@     produced by the model framework / backend with the given batch size.
+  //@@     For example, this duration should include the time to copy output
+  //@@     tensor data from the GPU.
+  //@@
+  StatisticDuration compute_output = 4;
+}
+
+//@@
+//@@.. cpp:var:: message ModelStatistics
+//@@
+//@@   Statistics for a specific model and version.
+//@@
+message ModelStatistics
+{
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the model. If not given returns statistics for all
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: string version
+  //@@
+  //@@     The version of the model.
+  //@@
+  string version = 2;
+
+  //@@  .. cpp:var:: uint64 last_inference
+  //@@
+  //@@     The timestamp of the last inference request made for this model,
+  //@@     as milliseconds since the epoch.
+  //@@
+  uint64 last_inference = 3;
+
+  //@@  .. cpp:var:: uint64 last_inference
+  //@@
+  //@@     The cumulative count of successful inference requests made for this
+  //@@     model. Each inference in a batched request is counted as an
+  //@@     individual inference. For example, if a client sends a single
+  //@@     inference request with batch size 64, "inference_count" will be
+  //@@     incremented by 64. Similarly, if a clients sends 64 individual
+  //@@     requests each with batch size 1, "inference_count" will be
+  //@@     incremented by 64. The "inference_count" value DOES NOT include
+  //@@     cache hits.
+  //@@
+  uint64 inference_count = 4;
+
+  //@@  .. cpp:var:: uint64 last_inference
+  //@@
+  //@@     The cumulative count of the number of successful inference executions
+  //@@     performed for the model. When dynamic batching is enabled, a single
+  //@@     model execution can perform inferencing for more than one inference
+  //@@     request. For example, if a clients sends 64 individual requests each
+  //@@     with batch size 1 and the dynamic batcher batches them into a single
+  //@@     large batch for model execution then "execution_count" will be
+  //@@     incremented by 1. If, on the other hand, the dynamic batcher is not
+  //@@     enabled for that each of the 64 individual requests is executed
+  //@@     independently, then "execution_count" will be incremented by 64.
+  //@@     The "execution_count" value DOES NOT include cache hits.
+  //@@
+  uint64 execution_count = 5;
+
+  //@@  .. cpp:var:: InferStatistics inference_stats
+  //@@
+  //@@     The aggregate statistics for the model/version.
+  //@@
+  InferStatistics inference_stats = 6;
+
+  //@@  .. cpp:var:: InferBatchStatistics batch_stats (repeated)
+  //@@
+  //@@     The aggregate statistics for each different batch size that is
+  //@@     executed in the model. The batch statistics indicate how many actual
+  //@@     model executions were performed and show differences due to different
+  //@@     batch size (for example, larger batches typically take longer to
+  //@@     compute).
+  //@@
+  repeated InferBatchStatistics batch_stats = 7;
+}
+
+//@@
+//@@.. cpp:var:: message ModelStatisticsResponse
+//@@
+//@@   Response message for ModelStatistics.
+//@@
+message ModelStatisticsResponse
+{
+  //@@  .. cpp:var:: ModelStatistics model_stats (repeated)
+  //@@
+  //@@     Statistics for each requested model.
+  //@@
+  repeated ModelStatistics model_stats = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelRepositoryParameter
+//@@
+//@@   An model repository parameter value.
+//@@
+message ModelRepositoryParameter
+{
+  //@@  .. cpp:var:: oneof parameter_choice
+  //@@
+  //@@     The parameter value can be a string, an int64 or
+  //@@     a boolean
+  //@@
+  oneof parameter_choice
+  {
+    //@@    .. cpp:var:: bool bool_param
+    //@@
+    //@@       A boolean parameter value.
+    //@@
+    bool bool_param = 1;
+
+    //@@    .. cpp:var:: int64 int64_param
+    //@@
+    //@@       An int64 parameter value.
+    //@@
+    int64 int64_param = 2;
+
+    //@@    .. cpp:var:: string string_param
+    //@@
+    //@@       A string parameter value.
+    //@@
+    string string_param = 3;
+
+    //@@    .. cpp:var:: bytes bytes_param
+    //@@
+    //@@       A bytes parameter value.
+    //@@
+    bytes bytes_param = 4;
+  }
+}
+
+//@@
+//@@.. cpp:var:: message RepositoryIndexRequest
+//@@
+//@@   Request message for RepositoryIndex.
+//@@
+message RepositoryIndexRequest
+{
+  //@@  .. cpp:var:: string repository_name
+  //@@
+  //@@     The name of the repository. If empty the index is returned
+  //@@     for all repositories.
+  //@@
+  string repository_name = 1;
+
+  //@@  .. cpp:var:: bool ready
+  //@@
+  //@@     If true returned only models currently ready for inferencing.
+  //@@
+  bool ready = 2;
+}
+
+//@@
+//@@.. cpp:var:: message RepositoryIndexResponse
+//@@
+//@@   Response message for RepositoryIndex.
+//@@
+message RepositoryIndexResponse
+{
+  //@@
+  //@@  .. cpp:var:: message ModelIndex
+  //@@
+  //@@     Index entry for a model.
+  //@@
+  message ModelIndex
+  {
+    //@@
+    //@@    .. cpp:var:: string name
+    //@@
+    //@@       The name of the model.
+    //@@
+    string name = 1;
+
+    //@@    .. cpp:var:: string version
+    //@@
+    //@@       The version of the model.
+    //@@
+    string version = 2;
+
+    //@@
+    //@@    .. cpp:var:: string state
+    //@@
+    //@@       The state of the model.
+    //@@
+    string state = 3;
+
+    //@@
+    //@@    .. cpp:var:: string reason
+    //@@
+    //@@       The reason, if any, that the model is in the given state.
+    //@@
+    string reason = 4;
+  }
+
+  //@@
+  //@@  .. cpp:var:: ModelIndex models (repeated)
+  //@@
+  //@@     An index entry for each model.
+  //@@
+  repeated ModelIndex models = 1;
+}
+
+//@@
+//@@.. cpp:var:: message RepositoryModelLoadRequest
+//@@
+//@@   Request message for RepositoryModelLoad.
+//@@
+message RepositoryModelLoadRequest
+{
+  //@@  .. cpp:var:: string repository_name
+  //@@
+  //@@     The name of the repository to load from. If empty the model
+  //@@     is loaded from any repository.
+  //@@
+  string repository_name = 1;
+
+  //@@  .. cpp:var:: string repository_name
+  //@@
+  //@@     The name of the model to load, or reload.
+  //@@
+  string model_name = 2;
+
+  //@@  .. cpp:var:: map<string,ModelRepositoryParameter> parameters
+  //@@
+  //@@     Optional model repository request parameters.
+  //@@
+  map<string, ModelRepositoryParameter> parameters = 3;
+}
+
+//@@
+//@@.. cpp:var:: message RepositoryModelLoadResponse
+//@@
+//@@   Response message for RepositoryModelLoad.
+//@@
+message RepositoryModelLoadResponse {}
+
+//@@
+//@@.. cpp:var:: message RepositoryModelUnloadRequest
+//@@
+//@@   Request message for RepositoryModelUnload.
+//@@
+message RepositoryModelUnloadRequest
+{
+  //@@  .. cpp:var:: string repository_name
+  //@@
+  //@@     The name of the repository from which the model was originally
+  //@@     loaded. If empty the repository is not considered.
+  //@@
+  string repository_name = 1;
+
+  //@@  .. cpp:var:: string repository_name
+  //@@
+  //@@     The name of the model to unload.
+  //@@
+  string model_name = 2;
+
+  //@@  .. cpp:var:: map<string,ModelRepositoryParameter> parameters
+  //@@
+  //@@     Optional model repository request parameters.
+  //@@
+  map<string, ModelRepositoryParameter> parameters = 3;
+}
+
+//@@
+//@@.. cpp:var:: message RepositoryModelUnloadResponse
+//@@
+//@@   Response message for RepositoryModelUnload.
+//@@
+message RepositoryModelUnloadResponse {}
+
+//@@
+//@@.. cpp:var:: message SystemSharedMemoryStatusRequest
+//@@
+//@@   Request message for SystemSharedMemoryStatus.
+//@@
+message SystemSharedMemoryStatusRequest
+{
+  //@@
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the region to get status for. If empty the
+  //@@     status is returned for all registered regions.
+  //@@
+  string name = 1;
+}
+
+//@@
+//@@.. cpp:var:: message SystemSharedMemoryStatusResponse
+//@@
+//@@   Response message for SystemSharedMemoryStatus.
+//@@
+message SystemSharedMemoryStatusResponse
+{
+  //@@
+  //@@  .. cpp:var:: message RegionStatus
+  //@@
+  //@@     Status for a shared memory region.
+  //@@
+  message RegionStatus
+  {
+    //@@
+    //@@    .. cpp:var:: string name
+    //@@
+    //@@       The name for the shared memory region.
+    //@@
+    string name = 1;
+
+    //@@    .. cpp:var:: string shared_memory_key
+    //@@
+    //@@       The key of the underlying memory object that contains the
+    //@@       shared memory region.
+    //@@
+    string key = 2;
+
+    //@@    .. cpp:var:: uint64 offset
+    //@@
+    //@@       Offset, in bytes, within the underlying memory object to
+    //@@       the start of the shared memory region.
+    //@@
+    uint64 offset = 3;
+
+    //@@    .. cpp:var:: uint64 byte_size
+    //@@
+    //@@       Size of the shared memory region, in bytes.
+    //@@
+    uint64 byte_size = 4;
+  }
+
+  //@@
+  //@@  .. cpp:var:: map<string,RegionStatus> regions
+  //@@
+  //@@     Status for each of the registered regions, indexed by
+  //@@     region name.
+  //@@
+  map<string, RegionStatus> regions = 1;
+}
+
+//@@
+//@@.. cpp:var:: message SystemSharedMemoryRegisterRequest
+//@@
+//@@   Request message for SystemSharedMemoryRegister.
+//@@
+message SystemSharedMemoryRegisterRequest
+{
+  //@@
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the region to register.
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: string shared_memory_key
+  //@@
+  //@@     The key of the underlying memory object that contains the
+  //@@     shared memory region.
+  //@@
+  string key = 2;
+
+  //@@  .. cpp:var:: uint64 offset
+  //@@
+  //@@     Offset, in bytes, within the underlying memory object to
+  //@@     the start of the shared memory region.
+  //@@
+  uint64 offset = 3;
+
+  //@@  .. cpp:var:: uint64 byte_size
+  //@@
+  //@@     Size of the shared memory region, in bytes.
+  //@@
+  uint64 byte_size = 4;
+}
+
+//@@
+//@@.. cpp:var:: message SystemSharedMemoryRegisterResponse
+//@@
+//@@   Response message for SystemSharedMemoryRegister.
+//@@
+message SystemSharedMemoryRegisterResponse {}
+
+//@@
+//@@.. cpp:var:: message SystemSharedMemoryUnregisterRequest
+//@@
+//@@   Request message for SystemSharedMemoryUnregister.
+//@@
+message SystemSharedMemoryUnregisterRequest
+{
+  //@@
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the system region to unregister. If empty
+  //@@     all system shared-memory regions are unregistered.
+  //@@
+  string name = 1;
+}
+
+//@@
+//@@.. cpp:var:: message SystemSharedMemoryUnregisterResponse
+//@@
+//@@   Response message for SystemSharedMemoryUnregister.
+//@@
+message SystemSharedMemoryUnregisterResponse {}
+
+//@@
+//@@.. cpp:var:: message CudaSharedMemoryStatusRequest
+//@@
+//@@   Request message for CudaSharedMemoryStatus.
+//@@
+message CudaSharedMemoryStatusRequest
+{
+  //@@
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the region to get status for. If empty the
+  //@@     status is returned for all registered regions.
+  //@@
+  string name = 1;
+}
+
+//@@
+//@@.. cpp:var:: message CudaSharedMemoryStatusResponse
+//@@
+//@@   Response message for CudaSharedMemoryStatus.
+//@@
+message CudaSharedMemoryStatusResponse
+{
+  //@@
+  //@@  .. cpp:var:: message RegionStatus
+  //@@
+  //@@     Status for a shared memory region.
+  //@@
+  message RegionStatus
+  {
+    //@@
+    //@@    .. cpp:var:: string name
+    //@@
+    //@@       The name for the shared memory region.
+    //@@
+    string name = 1;
+
+    //@@    .. cpp:var:: uin64 device_id
+    //@@
+    //@@       The GPU device ID where the cudaIPC handle was created.
+    //@@
+    uint64 device_id = 2;
+
+    //@@    .. cpp:var:: uint64 byte_size
+    //@@
+    //@@       Size of the shared memory region, in bytes.
+    //@@
+    uint64 byte_size = 3;
+  }
+
+  //@@
+  //@@  .. cpp:var:: map<string,RegionStatus> regions
+  //@@
+  //@@     Status for each of the registered regions, indexed by
+  //@@     region name.
+  //@@
+  map<string, RegionStatus> regions = 1;
+}
+
+//@@
+//@@.. cpp:var:: message CudaSharedMemoryRegisterRequest
+//@@
+//@@   Request message for CudaSharedMemoryRegister.
+//@@
+message CudaSharedMemoryRegisterRequest
+{
+  //@@
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the region to register.
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: bytes raw_handle
+  //@@
+  //@@     The raw serialized cudaIPC handle.
+  //@@
+  bytes raw_handle = 2;
+
+  //@@  .. cpp:var:: int64 device_id
+  //@@
+  //@@     The GPU device ID on which the cudaIPC handle was created.
+  //@@
+  int64 device_id = 3;
+
+  //@@  .. cpp:var:: uint64 byte_size
+  //@@
+  //@@     Size of the shared memory block, in bytes.
+  //@@
+  uint64 byte_size = 4;
+}
+
+//@@
+//@@.. cpp:var:: message CudaSharedMemoryRegisterResponse
+//@@
+//@@   Response message for CudaSharedMemoryRegister.
+//@@
+message CudaSharedMemoryRegisterResponse {}
+
+//@@
+//@@.. cpp:var:: message CudaSharedMemoryUnregisterRequest
+//@@
+//@@   Request message for CudaSharedMemoryUnregister.
+//@@
+message CudaSharedMemoryUnregisterRequest
+{
+  //@@
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the cuda region to unregister. If empty
+  //@@     all cuda shared-memory regions are unregistered.
+  //@@
+  string name = 1;
+}
+
+//@@
+//@@.. cpp:var:: message CudaSharedMemoryUnregisterResponse
+//@@
+//@@   Response message for CudaSharedMemoryUnregister.
+//@@
+message CudaSharedMemoryUnregisterResponse {}
+
+//@@
+//@@.. cpp:var:: message TraceSettingRequest
+//@@
+//@@   Request message for TraceSetting.
+//@@
+message TraceSettingRequest
+{
+  //@@
+  //@@  .. cpp:var:: message SettingValue
+  //@@
+  //@@     The values to be associated with a trace setting.
+  //@@     If no value is provided, the setting will be clear and
+  //@@     the global setting value will be used.
+  //@@
+  message SettingValue
+  {
+    //@@
+    //@@    .. cpp:var:: string value (repeated)
+    //@@
+    //@@       The value.
+    //@@
+    repeated string value = 1;
+  }
+
+  //@@  .. cpp:var:: map<string,SettingValue> settings
+  //@@
+  //@@     The new setting values to be updated,
+  //@@     settings that are not specified will remain unchanged.
+  //@@
+  map<string, SettingValue> settings = 1;
+
+  //@@
+  //@@  .. cpp:var:: string model_name
+  //@@
+  //@@     The name of the model to apply the new trace settings.
+  //@@     If not given, the new settings will be applied globally.
+  //@@
+  string model_name = 2;
+}
+
+//@@
+//@@.. cpp:var:: message TraceSettingResponse
+//@@
+//@@   Response message for TraceSetting.
+//@@
+message TraceSettingResponse
+{
+  //@@
+  //@@  .. cpp:var:: message SettingValue
+  //@@
+  //@@     The values to be associated with a trace setting.
+  //@@
+  message SettingValue
+  {
+    //@@
+    //@@    .. cpp:var:: string value (repeated)
+    //@@
+    //@@       The value.
+    //@@
+    repeated string value = 1;
+  }
+
+  //@@  .. cpp:var:: map<string,SettingValue> settings
+  //@@
+  //@@     The current trace settings, including any changes specified
+  //@@     by TraceSettingRequest.
+  //@@
+  map<string, SettingValue> settings = 1;
+}
+
+//@@
+//@@.. cpp:var:: message LogSettingsRequest
+//@@
+//@@   Request message for LogSettings.
+//@@
+message LogSettingsRequest
+{
+  message SettingValue
+  {
+    oneof parameter_choice
+    {
+      //@@    .. cpp:var:: bool bool_param
+      //@@
+      //@@       A boolean parameter value.
+      //@@
+      bool bool_param = 1;
+
+      //@@    .. cpp:var:: uint32 uint32_param
+      //@@
+      //@@       An uint32 parameter value.
+      //@@
+      uint32 uint32_param = 2;
+
+      //@@    .. cpp:var:: string string_param
+      //@@
+      //@@       A string parameter value.
+      //@@
+      string string_param = 3;
+    }
+  }
+  //@@  .. cpp:var:: map<string,SettingValue> settings
+  //@@
+  //@@     The current log settings.
+  //@@
+  map<string, SettingValue> settings = 1;
+}
+
+//@@
+//@@.. cpp:var:: message LogSettingsResponse
+//@@
+//@@   Response message for LogSettings.
+//@@
+message LogSettingsResponse
+{
+  message SettingValue
+  {
+    oneof parameter_choice
+    {
+      //@@    .. cpp:var:: bool bool_param
+      //@@
+      //@@       A boolean parameter value.
+      //@@
+      bool bool_param = 1;
+
+      //@@    .. cpp:var:: uint32 uint32_param
+      //@@
+      //@@       An int32 parameter value.
+      //@@
+      uint32 uint32_param = 2;
+
+      //@@    .. cpp:var:: string string_param
+      //@@
+      //@@       A string parameter value.
+      //@@
+      string string_param = 3;
+    }
+  }
+  //@@  .. cpp:var:: map<string,SettingValue> settings
+  //@@
+  //@@     The current log settings.
+  //@@
+  map<string, SettingValue> settings = 1;
+}
+
--- a/3rdparty/common-r22.12/protobuf/model_config.proto
+++ b/3rdparty/common-r22.12/protobuf/model_config.proto
+// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2018, TensorFlow Authors. All rights reserved.
+
+syntax = "proto3";
+
+package inference;
+
+//@@.. cpp:namespace:: inference
+
+//@@
+//@@.. cpp:enum:: DataType
+//@@
+//@@   Data types supported for input and output tensors.
+//@@
+enum DataType {
+  //@@  .. cpp:enumerator:: DataType::INVALID = 0
+  TYPE_INVALID = 0;
+
+  //@@  .. cpp:enumerator:: DataType::BOOL = 1
+  TYPE_BOOL = 1;
+
+  //@@  .. cpp:enumerator:: DataType::UINT8 = 2
+  TYPE_UINT8 = 2;
+  //@@  .. cpp:enumerator:: DataType::UINT16 = 3
+  TYPE_UINT16 = 3;
+  //@@  .. cpp:enumerator:: DataType::UINT32 = 4
+  TYPE_UINT32 = 4;
+  //@@  .. cpp:enumerator:: DataType::UINT64 = 5
+  TYPE_UINT64 = 5;
+
+  //@@  .. cpp:enumerator:: DataType::INT8 = 6
+  TYPE_INT8 = 6;
+  //@@  .. cpp:enumerator:: DataType::INT16 = 7
+  TYPE_INT16 = 7;
+  //@@  .. cpp:enumerator:: DataType::INT32 = 8
+  TYPE_INT32 = 8;
+  //@@  .. cpp:enumerator:: DataType::INT64 = 9
+  TYPE_INT64 = 9;
+
+  //@@  .. cpp:enumerator:: DataType::FP16 = 10
+  TYPE_FP16 = 10;
+  //@@  .. cpp:enumerator:: DataType::FP32 = 11
+  TYPE_FP32 = 11;
+  //@@  .. cpp:enumerator:: DataType::FP64 = 12
+  TYPE_FP64 = 12;
+
+  //@@  .. cpp:enumerator:: DataType::STRING = 13
+  TYPE_STRING = 13;
+
+  //@@  .. cpp:enumerator:: DataType::BF16 = 14
+  TYPE_BF16 = 14;
+}
+
+//@@
+//@@  .. cpp:var:: message ModelRateLimiter
+//@@
+//@@     The specifications required by the rate limiter to properly
+//@@     schedule the inference requests across the different models
+//@@     and their instances.
+//@@
+message ModelRateLimiter
+{
+  //@@  .. cpp:var:: message Resource
+  //@@
+  //@@     The resource property.
+  //@@
+  message Resource
+  {
+    //@@  .. cpp:var:: string name
+    //@@
+    //@@     The name associated with the resource.
+    //@@
+    string name = 1;
+
+    //@@  .. cpp:var:: bool global
+    //@@
+    //@@     Whether or not the resource is global. If true then the resource
+    //@@     is assumed to be shared among the devices otherwise specified
+    //@@     count of the resource is assumed for each device associated
+    //@@     with the instance.
+    //@@
+    bool global = 2;
+
+    //@@  .. cpp:var:: uint32 count
+    //@@
+    //@@     The number of resources required for the execution of the model
+    //@@     instance.
+    //@@
+    uint32 count = 3;
+  }
+
+  //@@  .. cpp:var:: Resource resources (repeated)
+  //@@
+  //@@     The resources required to execute the request on a model instance.
+  //@@     Resources are just names with a corresponding count. The execution
+  //@@     of the instance will be blocked until the specificied resources are
+  //@@     available. By default an instance uses no rate-limiter resources.
+  //@@
+  repeated Resource resources = 1;
+
+  //@@  .. cpp:var:: uint32 priority
+  //@@
+  //@@     The optional weighting value to be used for prioritizing across
+  //@@     instances. An instance with priority 2 will be given 1/2 the
+  //@@     number of scheduling chances as an instance_group with priority
+  //@@     1. The default priority is 1. The priority of value 0 will be
+  //@@     treated as priority 1.
+  //@@
+  uint32 priority = 2;
+}
+
+//@@
+//@@.. cpp:var:: message ModelInstanceGroup
+//@@
+//@@   A group of one or more instances of a model and resources made
+//@@   available for those instances.
+//@@
+message ModelInstanceGroup
+{
+  //@@
+  //@@  .. cpp:enum:: Kind
+  //@@
+  //@@     Kind of this instance group.
+  //@@
+  enum Kind {
+    //@@    .. cpp:enumerator:: Kind::KIND_AUTO = 0
+    //@@
+    //@@       This instance group represents instances that can run on either
+    //@@       CPU or GPU. If all GPUs listed in 'gpus' are available then
+    //@@       instances will be created on GPU(s), otherwise instances will
+    //@@       be created on CPU.
+    //@@
+    KIND_AUTO = 0;
+
+    //@@    .. cpp:enumerator:: Kind::KIND_GPU = 1
+    //@@
+    //@@       This instance group represents instances that must run on the
+    //@@       GPU.
+    //@@
+    KIND_GPU = 1;
+
+    //@@    .. cpp:enumerator:: Kind::KIND_CPU = 2
+    //@@
+    //@@       This instance group represents instances that must run on the
+    //@@       CPU.
+    //@@
+    KIND_CPU = 2;
+
+    //@@    .. cpp:enumerator:: Kind::KIND_MODEL = 3
+    //@@
+    //@@       This instance group represents instances that should run on the
+    //@@       CPU and/or GPU(s) as specified by the model or backend itself.
+    //@@       The inference server will not override the model/backend
+    //@@       settings.
+    //@@
+    KIND_MODEL = 3;
+  }
+
+  //@@
+  //@@  .. cpp:var:: message SecondaryDevice
+  //@@
+  //@@     A secondary device required for a model instance.
+  //@@
+  message SecondaryDevice
+  {
+    //@@
+    //@@  .. cpp:enum:: SecondaryDeviceKind
+    //@@
+    //@@     The kind of the secondary device.
+    //@@
+    enum SecondaryDeviceKind {
+      //@@    .. cpp:enumerator:: SecondaryDeviceKind::KIND_NVDLA = 0
+      //@@
+      //@@       An NVDLA core. http://nvdla.org
+      //@@       Currently KIND_NVDLA is only supported by the TensorRT backend.
+      //@@
+      KIND_NVDLA = 0;
+    }
+
+    //@@  .. cpp:var:: SecondaryDeviceKind kind
+    //@@
+    //@@     The secondary device kind.
+    //@@
+    SecondaryDeviceKind kind = 1;
+
+    //@@  .. cpp:var:: int64 device_id
+    //@@
+    //@@     Identifier for the secondary device.
+    //@@
+    int64 device_id = 2;
+  }
+
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     Optional name of this group of instances. If not specified the
+  //@@     name will be formed as <model name>_<group number>. The name of
+  //@@     individual instances will be further formed by a unique instance
+  //@@     number and GPU index:
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: Kind kind
+  //@@
+  //@@     The kind of this instance group. Default is KIND_AUTO. If
+  //@@     KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and
+  //@@     may be specified. If KIND_CPU or KIND_MODEL only 'count' is valid
+  //@@     and 'gpu' cannot be specified.
+  //@@
+  Kind kind = 4;
+
+  //@@  .. cpp:var:: int32 count
+  //@@
+  //@@     For a group assigned to GPU, the number of instances created for
+  //@@     each GPU listed in 'gpus'. For a group assigned to CPU the number
+  //@@     of instances created. Default is 1.
+  int32 count = 2;
+
+  //@@  .. cpp:var:: ModelRateLimiter rate_limiter
+  //@@
+  //@@     The rate limiter specific settings to be associated with this
+  //@@     instance group. Optional, if not specified no rate limiting
+  //@@     will be applied to this instance group.
+  //@@
+  ModelRateLimiter rate_limiter = 6;
+
+  //@@  .. cpp:var:: int32 gpus (repeated)
+  //@@
+  //@@     GPU(s) where instances should be available. For each GPU listed,
+  //@@     'count' instances of the model will be available. Setting 'gpus'
+  //@@     to empty (or not specifying at all) is eqivalent to listing all
+  //@@     available GPUs.
+  //@@
+  repeated int32 gpus = 3;
+
+  //@@  .. cpp:var:: SecondaryDevice secondary_devices (repeated)
+  //@@
+  //@@     Secondary devices that are required by instances specified by this
+  //@@     instance group. Optional.
+  //@@
+  repeated SecondaryDevice secondary_devices = 8;
+
+  //@@  .. cpp:var:: string profile (repeated)
+  //@@
+  //@@     For TensorRT models containing multiple optimization profile, this
+  //@@     parameter specifies a set of optimization profiles available to this
+  //@@     instance group. The inference server will choose the optimal profile
+  //@@     based on the shapes of the input tensors. This field should lie
+  //@@     between 0 and <TotalNumberOfOptimizationProfilesInPlanModel> - 1
+  //@@     and be specified only for TensorRT backend, otherwise an error will
+  //@@     be generated. If not specified, the server will select the first
+  //@@     optimization profile by default.
+  //@@
+  repeated string profile = 5;
+
+  //@@  .. cpp:var:: bool passive
+  //@@
+  //@@     Whether the instances within this instance group will be accepting
+  //@@     inference requests from the scheduler. If true, the instances will
+  //@@     not be added to the scheduler. Default value is false.
+  //@@
+  bool passive = 7;
+
+  //@@  .. cpp:var:: string host_policy
+  //@@
+  //@@     The host policy name that the instance to be associated with.
+  //@@     The default value is set to reflect the device kind of the instance,
+  //@@     for instance, KIND_CPU is "cpu", KIND_MODEL is "model" and
+  //@@     KIND_GPU is "gpu_<gpu_id>".
+  //@@
+  string host_policy = 9;
+}
+
+//@@
+//@@.. cpp:var:: message ModelTensorReshape
+//@@
+//@@   Reshape specification for input and output tensors.
+//@@
+message ModelTensorReshape
+{
+  //@@  .. cpp:var:: int64 shape (repeated)
+  //@@
+  //@@     The shape to use for reshaping.
+  //@@
+  repeated int64 shape = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelInput
+//@@
+//@@   An input required by the model.
+//@@
+message ModelInput
+{
+  //@@
+  //@@  .. cpp:enum:: Format
+  //@@
+  //@@     The format for the input.
+  //@@
+  enum Format {
+    //@@    .. cpp:enumerator:: Format::FORMAT_NONE = 0
+    //@@
+    //@@       The input has no specific format. This is the default.
+    //@@
+    FORMAT_NONE = 0;
+
+    //@@    .. cpp:enumerator:: Format::FORMAT_NHWC = 1
+    //@@
+    //@@       HWC image format. Tensors with this format require 3 dimensions
+    //@@       if the model does not support batching (max_batch_size = 0) or 4
+    //@@       dimensions if the model does support batching (max_batch_size
+    //@@       >= 1). In either case the 'dims' below should only specify the
+    //@@       3 non-batch dimensions (i.e. HWC or CHW).
+    //@@
+    FORMAT_NHWC = 1;
+
+    //@@    .. cpp:enumerator:: Format::FORMAT_NCHW = 2
+    //@@
+    //@@       CHW image format. Tensors with this format require 3 dimensions
+    //@@       if the model does not support batching (max_batch_size = 0) or 4
+    //@@       dimensions if the model does support batching (max_batch_size
+    //@@       >= 1). In either case the 'dims' below should only specify the
+    //@@       3 non-batch dimensions (i.e. HWC or CHW).
+    //@@
+    FORMAT_NCHW = 2;
+  }
+
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the input.
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: DataType data_type
+  //@@
+  //@@     The data-type of the input.
+  //@@
+  DataType data_type = 2;
+
+  //@@  .. cpp:var:: Format format
+  //@@
+  //@@     The format of the input. Optional.
+  //@@
+  Format format = 3;
+
+  //@@  .. cpp:var:: int64 dims (repeated)
+  //@@
+  //@@     The dimensions/shape of the input tensor that must be provided
+  //@@     when invoking the inference API for this model.
+  //@@
+  repeated int64 dims = 4;
+
+  //@@  .. cpp:var:: ModelTensorReshape reshape
+  //@@
+  //@@     The shape expected for this input by the backend. The input will
+  //@@     be reshaped to this before being presented to the backend. The
+  //@@     reshape must have the same number of elements as the input shape
+  //@@     specified by 'dims'. Optional.
+  //@@
+  ModelTensorReshape reshape = 5;
+
+  //@@  .. cpp:var:: bool is_shape_tensor
+  //@@
+  //@@     Whether or not the input is a shape tensor to the model. This field
+  //@@     is currently supported only for the TensorRT model. An error will be
+  //@@     generated if this specification does not comply with underlying
+  //@@     model.
+  //@@
+  bool is_shape_tensor = 6;
+
+  //@@  .. cpp:var:: bool allow_ragged_batch
+  //@@
+  //@@     Whether or not the input is allowed to be "ragged" in a dynamically
+  //@@     created batch. Default is false indicating that two requests will
+  //@@     only be batched if this tensor has the same shape in both requests.
+  //@@     True indicates that two requests can be batched even if this tensor
+  //@@     has a different shape in each request.
+  //@@
+  bool allow_ragged_batch = 7;
+
+  //@@  .. cpp:var:: bool optional
+  //@@
+  //@@     Whether or not the input is optional for the model execution.
+  //@@     If true, the input is not required in the inference request.
+  //@@     Default value is false.
+  //@@
+  bool optional = 8;
+}
+
+//@@
+//@@.. cpp:var:: message ModelOutput
+//@@
+//@@   An output produced by the model.
+//@@
+message ModelOutput
+{
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the output.
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: DataType data_type
+  //@@
+  //@@     The data-type of the output.
+  //@@
+  DataType data_type = 2;
+
+  //@@  .. cpp:var:: int64 dims (repeated)
+  //@@
+  //@@     The dimensions/shape of the output tensor.
+  //@@
+  repeated int64 dims = 3;
+
+  //@@  .. cpp:var:: ModelTensorReshape reshape
+  //@@
+  //@@     The shape produced for this output by the backend. The output will
+  //@@     be reshaped from this to the shape specifed in 'dims' before being
+  //@@     returned in the inference response. The reshape must have the same
+  //@@     number of elements as the output shape specified by 'dims'. Optional.
+  //@@
+  ModelTensorReshape reshape = 5;
+
+  //@@  .. cpp:var:: string label_filename
+  //@@
+  //@@     The label file associated with this output. Should be specified only
+  //@@     for outputs that represent classifications. Optional.
+  //@@
+  string label_filename = 4;
+
+
+  //@@  .. cpp:var:: bool is_shape_tensor
+  //@@
+  //@@     Whether or not the output is a shape tensor to the model. This field
+  //@@     is currently supported only for the TensorRT model. An error will be
+  //@@     generated if this specification does not comply with underlying
+  //@@     model.
+  //@@
+  bool is_shape_tensor = 6;
+}
+
+//@@  .. cpp:var:: message BatchInput
+//@@
+//@@     A batch input is an additional input that must be added by
+//@@     the backend based on all the requests in a batch.
+//@@
+message BatchInput
+{
+  //@@
+  //@@    .. cpp:enum:: Kind
+  //@@
+  //@@       The kind of the batch input.
+  //@@
+  enum Kind {
+    //@@      .. cpp:enumerator:: Kind::BATCH_ELEMENT_COUNT = 0
+    //@@
+    //@@         The element count of the 'source_input' will be added as
+    //@@         input with shape [1].
+    //@@
+    BATCH_ELEMENT_COUNT = 0;
+
+    //@@      .. cpp:enumerator:: Kind::BATCH_ACCUMULATED_ELEMENT_COUNT = 1
+    //@@
+    //@@         The accumulated element count of the 'source_input' will be
+    //@@         added as input with shape [1]. For example, if there is a
+    //@@         batch of two request, each with 2 elements, an input of value
+    //@@         2 will be added to the first request, and an input of value
+    //@@         4 will be added to the second request.
+    //@@
+    BATCH_ACCUMULATED_ELEMENT_COUNT = 1;
+
+    //@@      .. cpp:enumerator::
+    //@@         Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2
+    //@@
+    //@@         The accumulated element count of the 'source_input' will be
+    //@@         added as input with shape [1], except for the first request
+    //@@         in the batch. For the first request in the batch, the input
+    //@@         will have shape [2] where the first element is value 0.
+    //@@
+    BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2;
+
+    //@@      .. cpp:enumerator:: Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3
+    //@@
+    //@@         Among the requests in the batch, the max element count of the
+    //@@         'source_input' will be added as input with shape
+    //@@         [max_element_count] for the first request in the batch.
+    //@@         For other requests, such input will be with shape [0].
+    //@@         The data of the tensor will be uninitialized.
+    //@@
+    BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3;
+
+    //@@      .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE = 4
+    //@@
+    //@@         Among the requests in the batch, the shape of the
+    //@@         'source_input' will be added as input with shape
+    //@@         [batch_size, len(input_dim)]. For example, if one
+    //@@         batch-2 input with shape [3, 1] and batch-1 input
+    //@@         with shape [2, 2] are batched, the batch input will
+    //@@         have shape [3, 2] and value [ [3, 1], [3, 1], [2, 2]].
+    //@@
+    BATCH_ITEM_SHAPE = 4;
+
+    //@@      .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE_FLATTEN = 5
+    //@@
+    //@@         Among the requests in the batch, the shape of the
+    //@@         'source_input' will be added as input with single dimensional
+    //@@         shape [batch_size * len(input_dim)]. For example, if one
+    //@@         batch-2 input with shape [3, 1] and batch-1 input
+    //@@         with shape [2, 2] are batched, the batch input will
+    //@@         have shape [6] and value [3, 1, 3, 1, 2, 2].
+    //@@
+    BATCH_ITEM_SHAPE_FLATTEN = 5;
+  }
+
+  //@@    .. cpp:var:: Kind kind
+  //@@
+  //@@       The kind of this batch input.
+  //@@
+  Kind kind = 1;
+
+  //@@    .. cpp:var:: string target_name (repeated)
+  //@@
+  //@@       The name of the model inputs that the backend will create
+  //@@       for this batch input.
+  //@@
+  repeated string target_name = 2;
+
+  //@@    .. cpp:var:: DataType data_type
+  //@@
+  //@@       The input's datatype. The data type can be TYPE_INT32 or
+  //@@       TYPE_FP32.
+  //@@
+  DataType data_type = 3;
+
+  //@@    .. cpp:var:: string source_input (repeated)
+  //@@
+  //@@       The backend derives the value for each batch input from one or
+  //@@       more other inputs. 'source_input' gives the names of those
+  //@@       inputs.
+  //@@
+  repeated string source_input = 4;
+}
+
+//@@.. cpp:var:: message BatchOutput
+//@@
+//@@   A batch output is an output produced by the model that must be handled
+//@@   differently by the backend based on all the requests in a batch.
+//@@
+message BatchOutput
+{
+  //@@
+  //@@  .. cpp:enum:: Kind
+  //@@
+  //@@     The kind of the batch output.
+  //@@
+  enum Kind {
+    //@@    .. cpp:enumerator:: Kind::BATCH_SCATTER_WITH_INPUT_SHAPE = 0
+    //@@
+    //@@       The output should be scattered according to the shape of
+    //@@       'source_input'. The dynamic dimension of the output will
+    //@@       be set to the value of the same dimension in the input.
+    //@@
+    BATCH_SCATTER_WITH_INPUT_SHAPE = 0;
+  }
+
+  //@@  .. cpp:var:: string target_name (repeated)
+  //@@
+  //@@     The name of the outputs to be produced by this batch output
+  //@@     specification.
+  //@@
+  repeated string target_name = 1;
+
+  //@@  .. cpp:var:: Kind kind
+  //@@
+  //@@     The kind of this batch output.
+  //@@
+  Kind kind = 2;
+
+  //@@  .. cpp:var:: string source_input (repeated)
+  //@@
+  //@@     The backend derives each batch output from one or more inputs.
+  //@@     'source_input' gives the names of those inputs.
+  //@@
+  repeated string source_input = 3;
+}
+
+//@@
+//@@.. cpp:var:: message ModelVersionPolicy
+//@@
+//@@   Policy indicating which versions of a model should be made
+//@@   available by the inference server.
+//@@
+message ModelVersionPolicy
+{
+  //@@  .. cpp:var:: message Latest
+  //@@
+  //@@     Serve only the latest version(s) of a model. This is
+  //@@     the default policy.
+  //@@
+  message Latest
+  {
+    //@@    .. cpp:var:: uint32 num_versions
+    //@@
+    //@@       Serve only the 'num_versions' highest-numbered versions. T
+    //@@       The default value of 'num_versions' is 1, indicating that by
+    //@@       default only the single highest-number version of a
+    //@@       model will be served.
+    //@@
+    uint32 num_versions = 1;
+  }
+
+  //@@  .. cpp:var:: message All
+  //@@
+  //@@     Serve all versions of the model.
+  //@@
+  message All {}
+
+  //@@  .. cpp:var:: message Specific
+  //@@
+  //@@     Serve only specific versions of the model.
+  //@@
+  message Specific
+  {
+    //@@    .. cpp:var:: int64 versions (repeated)
+    //@@
+    //@@       The specific versions of the model that will be served.
+    //@@
+    repeated int64 versions = 1;
+  }
+
+  //@@  .. cpp:var:: oneof policy_choice
+  //@@
+  //@@     Each model must implement only a single version policy. The
+  //@@     default policy is 'Latest'.
+  //@@
+  oneof policy_choice
+  {
+    //@@    .. cpp:var:: Latest latest
+    //@@
+    //@@       Serve only latest version(s) of the model.
+    //@@
+    Latest latest = 1;
+
+    //@@    .. cpp:var:: All all
+    //@@
+    //@@       Serve all versions of the model.
+    //@@
+    All all = 2;
+
+    //@@    .. cpp:var:: Specific specific
+    //@@
+    //@@       Serve only specific version(s) of the model.
+    //@@
+    Specific specific = 3;
+  }
+}
+
+//@@
+//@@.. cpp:var:: message ModelOptimizationPolicy
+//@@
+//@@   Optimization settings for a model. These settings control if/how a
+//@@   model is optimized and prioritized by the backend framework when
+//@@   it is loaded.
+//@@
+message ModelOptimizationPolicy
+{
+  //@@
+  //@@  .. cpp:var:: message Graph
+  //@@
+  //@@     Enable generic graph optimization of the model. If not specified
+  //@@     the framework's default level of optimization is used. Supports
+  //@@     TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow
+  //@@     causes XLA to be enabled/disabled for the model. For Onnx defaults
+  //@@     to enabling all optimizations, -1 enables only basic optimizations,
+  //@@     +1 enables only basic and extended optimizations.
+  //@@
+  message Graph
+  {
+    //@@    .. cpp:var:: int32 level
+    //@@
+    //@@       The optimization level. Defaults to 0 (zero) if not specified.
+    //@@
+    //@@         - -1: Disabled
+    //@@         -  0: Framework default
+    //@@         -  1+: Enable optimization level (greater values indicate
+    //@@            higher optimization levels)
+    //@@
+    int32 level = 1;
+  }
+
+  //@@
+  //@@  .. cpp:enum:: ModelPriority
+  //@@
+  //@@     Model priorities. A model will be given scheduling and execution
+  //@@     preference over models at lower priorities. Current model
+  //@@     priorities only work for TensorRT models.
+  //@@
+  enum ModelPriority {
+    //@@    .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0
+    //@@
+    //@@       The default model priority.
+    //@@
+    PRIORITY_DEFAULT = 0;
+
+    //@@    .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1
+    //@@
+    //@@       The maximum model priority.
+    //@@
+    PRIORITY_MAX = 1;
+
+    //@@    .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2
+    //@@
+    //@@       The minimum model priority.
+    //@@
+    PRIORITY_MIN = 2;
+  }
+
+  //@@
+  //@@  .. cpp:var:: message Cuda
+  //@@
+  //@@     CUDA-specific optimization settings.
+  //@@
+  message Cuda
+  {
+    //@@    .. cpp:var:: message GraphSpec
+    //@@
+    //@@       Specification of the CUDA graph to be captured.
+    //@@
+    message GraphSpec
+    {
+      //@@      .. cpp:var:: message Dims
+      //@@
+      //@@         Specification of tensor dimension.
+      //@@
+      message Shape
+      {
+        //@@        .. cpp:var:: int64 dim (repeated)
+        //@@
+        //@@           The dimension.
+        //@@
+        repeated int64 dim = 1;
+      }
+
+      message LowerBound
+      {
+        //@@      .. cpp:var:: int32 batch_size
+        //@@
+        //@@         The batch size of the CUDA graph. If 'max_batch_size' is 0,
+        //@@         'batch_size' must be set to 0. Otherwise, 'batch_size' must
+        //@@         be set to value between 1 and 'max_batch_size'.
+        //@@
+        int32 batch_size = 1;
+
+        //@@      .. cpp:var:: map<string, Shape> input
+        //@@
+        //@@         The specification of the inputs. 'Shape' is the shape of
+        //@@         the input without batching dimension.
+        //@@
+        map<string, Shape> input = 2;
+      }
+
+      //@@      .. cpp:var:: int32 batch_size
+      //@@
+      //@@         The batch size of the CUDA graph. If 'max_batch_size' is 0,
+      //@@         'batch_size' must be set to 0. Otherwise, 'batch_size' must
+      //@@         be set to value between 1 and 'max_batch_size'.
+      //@@
+      int32 batch_size = 1;
+
+      //@@      .. cpp:var:: map<string, Shape> input
+      //@@
+      //@@         The specification of the inputs. 'Shape' is the shape of the
+      //@@         input without batching dimension.
+      //@@
+      map<string, Shape> input = 2;
+
+      //@@      .. cpp:var:: LowerBound graph_lower_bound
+      //@@
+      //@@         Specify the lower bound of the CUDA graph. Optional.
+      //@@         If specified, the graph can be used for input shapes and
+      //@@         batch sizes that are in closed interval between the lower
+      //@@         bound specification and graph specification. For dynamic
+      //@@         shape model, this allows CUDA graphs to be launched
+      //@@         frequently without capturing all possible shape combinations.
+      //@@         However, using graph for shape combinations different from
+      //@@         the one used for capturing introduces uninitialized data for
+      //@@         execution and it may distort the inference result if
+      //@@         the model is sensitive to uninitialized data.
+      //@@
+      LowerBound graph_lower_bound = 3;
+    }
+
+    //@@    .. cpp:var:: bool graphs
+    //@@
+    //@@       Use CUDA graphs API to capture model operations and execute
+    //@@       them more efficiently. Default value is false.
+    //@@       Currently only recognized by TensorRT backend.
+    //@@
+    bool graphs = 1;
+
+    //@@    .. cpp:var:: bool busy_wait_events
+    //@@
+    //@@       Use busy-waiting to synchronize CUDA events to achieve minimum
+    //@@       latency from event complete to host thread to be notified, with
+    //@@       the cost of high CPU load. Default value is false.
+    //@@       Currently only recognized by TensorRT backend.
+    //@@
+    bool busy_wait_events = 2;
+
+    //@@    .. cpp:var:: GraphSpec graph_spec (repeated)
+    //@@
+    //@@       Specification of the CUDA graph to be captured. If not specified
+    //@@       and 'graphs' is true, the default CUDA graphs will be captured
+    //@@       based on model settings.
+    //@@       Currently only recognized by TensorRT backend.
+    //@@
+    repeated GraphSpec graph_spec = 3;
+
+    //@@    .. cpp:var:: bool output_copy_stream
+    //@@
+    //@@       Uses a CUDA stream separate from the inference stream to copy the
+    //@@       output to host. However, be aware that setting this option to
+    //@@       true will lead to an increase in the memory consumption of the
+    //@@       model as Triton will allocate twice as much GPU memory for its
+    //@@       I/O tensor buffers. Default value is false.
+    //@@       Currently only recognized by TensorRT backend.
+    //@@
+    bool output_copy_stream = 4;
+  }
+
+  //@@
+  //@@  .. cpp:var:: message ExecutionAccelerators
+  //@@
+  //@@     Specify the preferred execution accelerators to be used to execute
+  //@@     the model. Currently only recognized by ONNX Runtime backend and
+  //@@     TensorFlow backend.
+  //@@
+  //@@     For ONNX Runtime backend, it will deploy the model with the execution
+  //@@     accelerators by priority, the priority is determined based on the
+  //@@     order that they are set, i.e. the provider at the front has highest
+  //@@     priority. Overall, the priority will be in the following order:
+  //@@         <gpu_execution_accelerator> (if instance is on GPU)
+  //@@         CUDA Execution Provider     (if instance is on GPU)
+  //@@         <cpu_execution_accelerator>
+  //@@         Default CPU Execution Provider
+  //@@
+  message ExecutionAccelerators
+  {
+    //@@
+    //@@  .. cpp:var:: message Accelerator
+    //@@
+    //@@     Specify the accelerator to be used to execute the model.
+    //@@     Accelerator with the same name may accept different parameters
+    //@@     depending on the backends.
+    //@@
+    message Accelerator
+    {
+      //@@    .. cpp:var:: string name
+      //@@
+      //@@       The name of the execution accelerator.
+      //@@
+      string name = 1;
+
+      //@@    .. cpp:var:: map<string, string> parameters
+      //@@
+      //@@       Additional paremeters used to configure the accelerator.
+      //@@
+      map<string, string> parameters = 2;
+    }
+
+    //@@    .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
+    //@@
+    //@@       The preferred execution provider to be used if the model instance
+    //@@       is deployed on GPU.
+    //@@
+    //@@       For ONNX Runtime backend, possible value is "tensorrt" as name,
+    //@@       and no parameters are required.
+    //@@
+    //@@       For TensorFlow backend, possible values are "tensorrt",
+    //@@       "auto_mixed_precision", "gpu_io".
+    //@@
+    //@@       For "tensorrt", the following parameters can be specified:
+    //@@         "precision_mode": The precision used for optimization.
+    //@@         Allowed values are "FP32" and "FP16". Default value is "FP32".
+    //@@
+    //@@         "max_cached_engines": The maximum number of cached TensorRT
+    //@@         engines in dynamic TensorRT ops. Default value is 100.
+    //@@
+    //@@         "minimum_segment_size": The smallest model subgraph that will
+    //@@         be considered for optimization by TensorRT. Default value is 3.
+    //@@
+    //@@         "max_workspace_size_bytes": The maximum GPU memory the model
+    //@@         can use temporarily during execution. Default value is 1GB.
+    //@@
+    //@@       For "auto_mixed_precision", no parameters are required. If set,
+    //@@       the model will try to use FP16 for better performance.
+    //@@       This optimization can not be set with "tensorrt".
+    //@@
+    //@@       For "gpu_io", no parameters are required. If set, the model will
+    //@@       be executed using TensorFlow Callable API to set input and output
+    //@@       tensors in GPU memory if possible, which can reduce data transfer
+    //@@       overhead if the model is used in ensemble. However, the Callable
+    //@@       object will be created on model creation and it will request all
+    //@@       outputs for every model execution, which may impact the
+    //@@       performance if a request does not require all outputs. This
+    //@@       optimization will only take affect if the model instance is
+    //@@       created with KIND_GPU.
+    //@@
+    repeated Accelerator gpu_execution_accelerator = 1;
+
+    //@@    .. cpp:var:: Accelerator cpu_execution_accelerator (repeated)
+    //@@
+    //@@       The preferred execution provider to be used if the model instance
+    //@@       is deployed on CPU.
+    //@@
+    //@@       For ONNX Runtime backend, possible value is "openvino" as name,
+    //@@       and no parameters are required.
+    //@@
+    repeated Accelerator cpu_execution_accelerator = 2;
+  }
+
+  //@@
+  //@@  .. cpp:var:: message PinnedMemoryBuffer
+  //@@
+  //@@     Specify whether to use a pinned memory buffer when transferring data
+  //@@     between non-pinned system memory and GPU memory. Using a pinned
+  //@@     memory buffer for system from/to GPU transfers will typically provide
+  //@@     increased performance. For example, in the common use case where the
+  //@@     request provides inputs and delivers outputs via non-pinned system
+  //@@     memory, if the model instance accepts GPU IOs, the inputs will be
+  //@@     processed by two copies: from non-pinned system memory to pinned
+  //@@     memory, and from pinned memory to GPU memory. Similarly, pinned
+  //@@     memory will be used for delivering the outputs.
+  //@@
+  message PinnedMemoryBuffer
+  {
+    //@@    .. cpp:var:: bool enable
+    //@@
+    //@@       Use pinned memory buffer. Default is true.
+    //@@
+    bool enable = 1;
+  }
+
+  //@@  .. cpp:var:: Graph graph
+  //@@
+  //@@     The graph optimization setting for the model. Optional.
+  //@@
+  Graph graph = 1;
+
+  //@@  .. cpp:var:: ModelPriority priority
+  //@@
+  //@@     The priority setting for the model. Optional.
+  //@@
+  ModelPriority priority = 2;
+
+  //@@  .. cpp:var:: Cuda cuda
+  //@@
+  //@@     CUDA-specific optimization settings. Optional.
+  //@@
+  Cuda cuda = 3;
+
+  //@@  .. cpp:var:: ExecutionAccelerators execution_accelerators
+  //@@
+  //@@     The accelerators used for the model. Optional.
+  //@@
+  ExecutionAccelerators execution_accelerators = 4;
+
+  //@@  .. cpp:var:: PinnedMemoryBuffer input_pinned_memory
+  //@@
+  //@@     Use pinned memory buffer when the data transfer for inputs
+  //@@     is between GPU memory and non-pinned system memory.
+  //@@     Default is true.
+  //@@
+  PinnedMemoryBuffer input_pinned_memory = 5;
+
+  //@@  .. cpp:var:: PinnedMemoryBuffer output_pinned_memory
+  //@@
+  //@@     Use pinned memory buffer when the data transfer for outputs
+  //@@     is between GPU memory and non-pinned system memory.
+  //@@     Default is true.
+  //@@
+  PinnedMemoryBuffer output_pinned_memory = 6;
+
+  //@@  .. cpp:var:: uint32 gather_kernel_buffer_threshold
+  //@@
+  //@@     The backend may use a gather kernel to gather input data if the
+  //@@     device has direct access to the source buffer and the destination
+  //@@     buffer. In such case, the gather kernel will be used only if the
+  //@@     number of buffers to be gathered is greater or equal to
+  //@@     the specifed value. If 0, the gather kernel will be disabled.
+  //@@     Default value is 0.
+  //@@     Currently only recognized by TensorRT backend.
+  //@@
+  uint32 gather_kernel_buffer_threshold = 7;
+
+  //@@  .. cpp:var:: bool eager_batching
+  //@@
+  //@@     Start preparing the next batch before the model instance is ready
+  //@@     for the next inference. This option can be used to overlap the
+  //@@     batch preparation with model execution, with the trade-off that
+  //@@     the next batch might be smaller than what it could have been.
+  //@@     Default value is false.
+  //@@     Currently only recognized by TensorRT backend.
+  //@@
+  bool eager_batching = 8;
+}
+
+//@@
+//@@.. cpp:var:: message ModelQueuePolicy
+//@@
+//@@   Queue policy for inference requests.
+//@@
+message ModelQueuePolicy
+{
+  //@@
+  //@@  .. cpp:enum:: TimeoutAction
+  //@@
+  //@@     The action applied to timed-out requests.
+  //@@
+  enum TimeoutAction {
+    //@@    .. cpp:enumerator:: Action::REJECT = 0
+    //@@
+    //@@       Reject the request and return error message accordingly.
+    //@@
+    REJECT = 0;
+
+    //@@    .. cpp:enumerator:: Action::DELAY = 1
+    //@@
+    //@@       Delay the request until all other requests at the same
+    //@@       (or higher) priority levels that have not reached their timeouts
+    //@@       are processed. A delayed request will eventually be processed,
+    //@@       but may be delayed indefinitely due to newly arriving requests.
+    //@@
+    DELAY = 1;
+  }
+
+  //@@
+  //@@  .. cpp:var:: TimeoutAction timeout_action
+  //@@
+  //@@     The action applied to timed-out request.
+  //@@     The default action is REJECT.
+  //@@
+  TimeoutAction timeout_action = 1;
+
+  //@@
+  //@@  .. cpp:var:: uint64 default_timeout_microseconds
+  //@@
+  //@@     The default timeout for every request, in microseconds.
+  //@@     The default value is 0 which indicates that no timeout is set.
+  //@@
+  uint64 default_timeout_microseconds = 2;
+
+  //@@
+  //@@  .. cpp:var:: bool allow_timeout_override
+  //@@
+  //@@     Whether individual request can override the default timeout value.
+  //@@     When true, individual requests can set a timeout that is less than
+  //@@     the default timeout value but may not increase the timeout.
+  //@@     The default value is false.
+  //@@
+  bool allow_timeout_override = 3;
+
+  //@@
+  //@@  .. cpp:var:: uint32 max_queue_size
+  //@@
+  //@@     The maximum queue size for holding requests. A request will be
+  //@@     rejected immediately if it can't be enqueued because the queue is
+  //@@     full. The default value is 0 which indicates that no maximum
+  //@@     queue size is enforced.
+  //@@
+  uint32 max_queue_size = 4;
+}
+
+//@@
+//@@.. cpp:var:: message ModelDynamicBatching
+//@@
+//@@   Dynamic batching configuration. These settings control how dynamic
+//@@   batching operates for the model.
+//@@
+message ModelDynamicBatching
+{
+  //@@  .. cpp:var:: int32 preferred_batch_size (repeated)
+  //@@
+  //@@     Preferred batch sizes for dynamic batching. If a batch of one of
+  //@@     these sizes can be formed it will be executed immediately.  If
+  //@@     not specified a preferred batch size will be chosen automatically
+  //@@     based on model and GPU characteristics.
+  //@@
+  repeated int32 preferred_batch_size = 1;
+
+  //@@  .. cpp:var:: uint64 max_queue_delay_microseconds
+  //@@
+  //@@     The maximum time, in microseconds, a request will be delayed in
+  //@@     the scheduling queue to wait for additional requests for
+  //@@     batching. Default is 0.
+  //@@
+  uint64 max_queue_delay_microseconds = 2;
+
+  //@@  .. cpp:var:: bool preserve_ordering
+  //@@
+  //@@     Should the dynamic batcher preserve the ordering of responses to
+  //@@     match the order of requests received by the scheduler. Default is
+  //@@     false. If true, the responses will be returned in the same order as
+  //@@     the order of requests sent to the scheduler. If false, the responses
+  //@@     may be returned in arbitrary order. This option is specifically
+  //@@     needed when a sequence of related inference requests (i.e. inference
+  //@@     requests with the same correlation ID) are sent to the dynamic
+  //@@     batcher to ensure that the sequence responses are in the correct
+  //@@     order.
+  //@@
+  bool preserve_ordering = 3;
+
+  //@@  .. cpp:var:: uint32 priority_levels
+  //@@
+  //@@     The number of priority levels to be enabled for the model,
+  //@@     the priority level starts from 1 and 1 is the highest priority.
+  //@@     Requests are handled in priority order with all priority 1 requests
+  //@@     processed before priority 2, all priority 2 requests processed before
+  //@@     priority 3, etc. Requests with the same priority level will be
+  //@@     handled in the order that they are received.
+  //@@
+  uint32 priority_levels = 4;
+
+  //@@  .. cpp:var:: uint32 default_priority_level
+  //@@
+  //@@     The priority level used for requests that don't specify their
+  //@@     priority. The value must be in the range [ 1, 'priority_levels' ].
+  //@@
+  uint32 default_priority_level = 5;
+
+  //@@  .. cpp:var:: ModelQueuePolicy default_queue_policy
+  //@@
+  //@@     The default queue policy used for requests that don't require
+  //@@     priority handling and requests that specify priority levels where
+  //@@     there is no specific policy given. If not specified, a policy with
+  //@@     default field values will be used.
+  //@@
+  ModelQueuePolicy default_queue_policy = 6;
+
+  //@@  .. cpp:var:: map<uint32, ModelQueuePolicy> priority_queue_policy
+  //@@
+  //@@     Specify the queue policy for the priority level. The default queue
+  //@@     policy will be used if a priority level doesn't specify a queue
+  //@@     policy.
+  //@@
+  map<uint32, ModelQueuePolicy> priority_queue_policy = 7;
+}
+
+//@@
+//@@.. cpp:var:: message ModelSequenceBatching
+//@@
+//@@   Sequence batching configuration. These settings control how sequence
+//@@   batching operates for the model.
+//@@
+message ModelSequenceBatching
+{
+  //@@  .. cpp:var:: message Control
+  //@@
+  //@@     A control is a signal that the sequence batcher uses to
+  //@@     communicate with a backend.
+  //@@
+  message Control
+  {
+    //@@
+    //@@    .. cpp:enum:: Kind
+    //@@
+    //@@       The kind of the control.
+    //@@
+    enum Kind {
+      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_START = 0
+      //@@
+      //@@         A new sequence is/is-not starting. If true a sequence is
+      //@@         starting, if false a sequence is continuing. Must
+      //@@         specify either int32_false_true, fp32_false_true or
+      //@@         bool_false_true for this control. This control is optional.
+      //@@
+      CONTROL_SEQUENCE_START = 0;
+
+      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_READY = 1
+      //@@
+      //@@         A sequence is/is-not ready for inference. If true the
+      //@@         input tensor data is valid and should be used. If false
+      //@@         the input tensor data is invalid and inferencing should
+      //@@         be "skipped". Must specify either int32_false_true,
+      //@@         fp32_false_true or bool_false_true for this control. This
+      //@@         control is optional.
+      //@@
+      CONTROL_SEQUENCE_READY = 1;
+
+      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_END = 2
+      //@@
+      //@@         A sequence is/is-not ending. If true a sequence is
+      //@@         ending, if false a sequence is continuing. Must specify
+      //@@         either int32_false_true, fp32_false_true or bool_false_true
+      //@@         for this control. This control is optional.
+      //@@
+      CONTROL_SEQUENCE_END = 2;
+
+      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_CORRID = 3
+      //@@
+      //@@         The correlation ID of the sequence. The correlation ID
+      //@@         is an uint64_t value that is communicated in whole or
+      //@@         in part by the tensor. The tensor's datatype must be
+      //@@         specified by data_type and must be TYPE_UINT64, TYPE_INT64,
+      //@@         TYPE_UINT32 or TYPE_INT32. If a 32-bit datatype is specified
+      //@@         the correlation ID will be truncated to the low-order 32
+      //@@         bits. This control is optional.
+      //@@
+      CONTROL_SEQUENCE_CORRID = 3;
+    }
+
+    //@@    .. cpp:var:: Kind kind
+    //@@
+    //@@       The kind of this control.
+    //@@
+    Kind kind = 1;
+
+    //@@    .. cpp:var:: int32 int32_false_true (repeated)
+    //@@
+    //@@       The control's true and false setting is indicated by setting
+    //@@       a value in an int32 tensor. The tensor must be a
+    //@@       1-dimensional tensor with size equal to the batch size of
+    //@@       the request. 'int32_false_true' must have two entries: the
+    //@@       first the false value and the second the true value.
+    //@@
+    repeated int32 int32_false_true = 2;
+
+    //@@    .. cpp:var:: float fp32_false_true (repeated)
+    //@@
+    //@@       The control's true and false setting is indicated by setting
+    //@@       a value in a fp32 tensor. The tensor must be a
+    //@@       1-dimensional tensor with size equal to the batch size of
+    //@@       the request. 'fp32_false_true' must have two entries: the
+    //@@       first the false value and the second the true value.
+    //@@
+    repeated float fp32_false_true = 3;
+
+    //@@    .. cpp:var:: bool bool_false_true (repeated)
+    //@@
+    //@@       The control's true and false setting is indicated by setting
+    //@@       a value in a bool tensor. The tensor must be a
+    //@@       1-dimensional tensor with size equal to the batch size of
+    //@@       the request. 'bool_false_true' must have two entries: the
+    //@@       first the false value and the second the true value.
+    //@@
+    repeated bool bool_false_true = 5;
+
+    //@@    .. cpp:var:: DataType data_type
+    //@@
+    //@@       The control's datatype.
+    //@@
+    DataType data_type = 4;
+  }
+
+  //@@  .. cpp:var:: message ControlInput
+  //@@
+  //@@     The sequence control values to communicate by a model input.
+  //@@
+  message ControlInput
+  {
+    //@@    .. cpp:var:: string name
+    //@@
+    //@@       The name of the model input.
+    //@@
+    string name = 1;
+
+    //@@    .. cpp:var:: Control control (repeated)
+    //@@
+    //@@       The control value(s) that should be communicated to the
+    //@@       model using this model input.
+    //@@
+    repeated Control control = 2;
+  }
+
+  //@@
+  //@@  .. cpp:var:: message InitialState
+  //@@
+  //@@     Settings used to initialize data for implicit state.
+  //@@
+  message InitialState
+  {
+    //@@      .. cpp:var:: DataType data_type
+    //@@
+    //@@         The data-type of the state.
+    //@@
+    DataType data_type = 1;
+
+    //@@      .. cpp:var:: int64 dims (repeated)
+    //@@
+    //@@         The shape of the state tensor, not including the batch dimension.
+    //@@
+    repeated int64 dims = 2;
+
+    //@@      .. cpp:var:: oneof state_data
+    //@@
+    //@@         Specify how the initial state data is generated.
+    //@@
+    oneof state_data
+    {
+      //@@
+      //@@      .. cpp:var:: bool zero_data
+      //@@
+      //@@         The identifier for using zeros as initial state data.
+      //@@         Note that the value of 'zero_data' will not be checked,
+      //@@         instead, zero data will be used as long as the field is set.
+      //@@
+      bool zero_data = 3;
+
+      //@@      .. cpp:var:: string data_file
+      //@@
+      //@@         The file whose content will be used as the initial data for
+      //@@         the state in row-major order. The file must be provided in
+      //@@         sub-directory 'initial_state' under the model directory.
+      //@@
+      string data_file = 4;
+    }
+
+    //@@  .. cpp:var:: string name
+    //@@
+    //@@     The name of the state initialization.
+    //@@
+    string name = 5;
+  }
+
+  //@@  .. cpp:var:: message State
+  //@@
+  //@@     An input / output pair of tensors that carry state for the sequence.
+  //@@
+  message State
+  {
+    //@@    .. cpp:var:: string input_name
+    //@@
+    //@@       The name of the model state input.
+    //@@
+    string input_name = 1;
+
+    //@@    .. cpp:var:: string output_name
+    //@@
+    //@@       The name of the model state output.
+    //@@
+    string output_name = 2;
+
+    //@@    .. cpp:var:: DataType data_type
+    //@@
+    //@@       The data-type of the state.
+    //@@
+    DataType data_type = 3;
+
+    //@@    .. cpp:var:: int64 dim (repeated)
+    //@@
+    //@@       The dimension.
+    //@@
+    repeated int64 dims = 4;
+
+    //@@  .. cpp:var:: InitialState initial_state (repeated)
+    //@@
+    //@@     The optional field to specify the initial state for the model.
+    //@@
+    repeated InitialState initial_state = 5;
+  }
+
+  //@@  .. cpp:var:: message StrategyDirect
+  //@@
+  //@@     The sequence batcher uses a specific, unique batch
+  //@@     slot for each sequence. All inference requests in a
+  //@@     sequence are directed to the same batch slot in the same
+  //@@     model instance over the lifetime of the sequence. This
+  //@@     is the default strategy.
+  //@@
+  message StrategyDirect
+  {
+    //@@    .. cpp:var:: uint64 max_queue_delay_microseconds
+    //@@
+    //@@       The maximum time, in microseconds, a candidate request
+    //@@       will be delayed in the sequence batch scheduling queue to
+    //@@       wait for additional requests for batching. Default is 0.
+    //@@
+    uint64 max_queue_delay_microseconds = 1;
+
+    //@@    .. cpp:var:: float minimum_slot_utilization
+    //@@
+    //@@       The minimum slot utilization that must be satisfied to
+    //@@       execute the batch before 'max_queue_delay_microseconds' expires.
+    //@@       For example, a value of 0.5 indicates that the batch should be
+    //@@       executed as soon as 50% or more of the slots are ready even if
+    //@@       the 'max_queue_delay_microseconds' timeout has not expired.
+    //@@       The default is 0.0, indicating that a batch will be executed
+    //@@       before 'max_queue_delay_microseconds' timeout expires if at least
+    //@@       one batch slot is ready. 'max_queue_delay_microseconds' will be
+    //@@       ignored unless minimum_slot_utilization is set to a non-zero
+    //@@       value.
+    //@@
+    float minimum_slot_utilization = 2;
+  }
+
+  //@@  .. cpp:var:: message StrategyOldest
+  //@@
+  //@@     The sequence batcher maintains up to 'max_candidate_sequences'
+  //@@     candidate sequences. 'max_candidate_sequences' can be greater
+  //@@     than the model's 'max_batch_size'. For inferencing the batcher
+  //@@     chooses from the candidate sequences up to 'max_batch_size'
+  //@@     inference requests. Requests are chosen in an oldest-first
+  //@@     manner across all candidate sequences. A given sequence is
+  //@@     not guaranteed to be assigned to the same batch slot for
+  //@@     all inference requests of that sequence.
+  //@@
+  message StrategyOldest
+  {
+    //@@    .. cpp:var:: int32 max_candidate_sequences
+    //@@
+    //@@       Maximum number of candidate sequences that the batcher
+    //@@       maintains. Excess seqences are kept in an ordered backlog
+    //@@       and become candidates when existing candidate sequences
+    //@@       complete.
+    //@@
+    int32 max_candidate_sequences = 1;
+
+    //@@    .. cpp:var:: int32 preferred_batch_size (repeated)
+    //@@
+    //@@       Preferred batch sizes for dynamic batching of candidate
+    //@@       sequences. If a batch of one of these sizes can be formed
+    //@@       it will be executed immediately. If not specified a
+    //@@       preferred batch size will be chosen automatically
+    //@@       based on model and GPU characteristics.
+    //@@
+    repeated int32 preferred_batch_size = 2;
+
+    //@@    .. cpp:var:: uint64 max_queue_delay_microseconds
+    //@@
+    //@@       The maximum time, in microseconds, a candidate request
+    //@@       will be delayed in the dynamic batch scheduling queue to
+    //@@       wait for additional requests for batching. Default is 0.
+    //@@
+    uint64 max_queue_delay_microseconds = 3;
+  }
+
+  //@@  .. cpp:var:: oneof strategy_choice
+  //@@
+  //@@     The strategy used by the sequence batcher. Default strategy
+  //@@     is 'direct'.
+  //@@
+  oneof strategy_choice
+  {
+    //@@    .. cpp:var:: StrategyDirect direct
+    //@@
+    //@@       StrategyDirect scheduling strategy.
+    //@@
+    StrategyDirect direct = 3;
+
+    //@@    .. cpp:var:: StrategyOldest oldest
+    //@@
+    //@@       StrategyOldest scheduling strategy.
+    //@@
+    StrategyOldest oldest = 4;
+  }
+
+  //@@  .. cpp:var:: uint64 max_sequence_idle_microseconds
+  //@@
+  //@@     The maximum time, in microseconds, that a sequence is allowed to
+  //@@     be idle before it is aborted. The inference server considers a
+  //@@     sequence idle when it does not have any inference request queued
+  //@@     for the sequence. If this limit is exceeded, the inference server
+  //@@     will free the sequence slot allocated by the sequence and make it
+  //@@     available for another sequence. If not specified (or specified as
+  //@@     zero) a default value of 1000000 (1 second) is used.
+  //@@
+  uint64 max_sequence_idle_microseconds = 1;
+
+  //@@  .. cpp:var:: ControlInput control_input (repeated)
+  //@@
+  //@@     The model input(s) that the server should use to communicate
+  //@@     sequence start, stop, ready and similar control values to the
+  //@@     model.
+  //@@
+  repeated ControlInput control_input = 2;
+
+  //@@  .. cpp:var:: State state (repeated)
+  //@@
+  //@@     The optional state that can be stored in Triton for performing
+  //@@     inference requests on a sequence. Each sequence holds an implicit
+  //@@     state local to itself. The output state tensor provided by the
+  //@@     model in 'output_name' field of the current inference request will
+  //@@     be transferred as an input tensor named 'input_name' in the next
+  //@@     request of the same sequence. The input state of the first request
+  //@@     in the sequence contains garbage data.
+  //@@
+  repeated State state = 5;
+}
+
+//@@
+//@@.. cpp:var:: message ModelEnsembling
+//@@
+//@@   Model ensembling configuration. These settings specify the models that
+//@@   compose the ensemble and how data flows between the models.
+//@@
+message ModelEnsembling
+{
+  //@@  .. cpp:var:: message Step
+  //@@
+  //@@     Each step specifies a model included in the ensemble,
+  //@@     maps ensemble tensor names to the model input tensors,
+  //@@     and maps model output tensors to ensemble tensor names
+  //@@
+  message Step
+  {
+    //@@  .. cpp:var:: string model_name
+    //@@
+    //@@     The name of the model to execute for this step of the ensemble.
+    //@@
+    string model_name = 1;
+
+    //@@  .. cpp:var:: int64 model_version
+    //@@
+    //@@     The version of the model to use for inference. If -1
+    //@@     the latest/most-recent version of the model is used.
+    //@@
+    int64 model_version = 2;
+
+    //@@  .. cpp:var:: map<string,string> input_map
+    //@@
+    //@@     Map from name of an input tensor on this step's model to ensemble
+    //@@     tensor name. The ensemble tensor must have the same data type and
+    //@@     shape as the model input. Each model input must be assigned to
+    //@@     one ensemble tensor, but the same ensemble tensor can be assigned
+    //@@     to multiple model inputs.
+    //@@
+    map<string, string> input_map = 3;
+
+    //@@  .. cpp:var:: map<string,string> output_map
+    //@@
+    //@@     Map from name of an output tensor on this step's model to ensemble
+    //@@     tensor name. The data type and shape of the ensemble tensor will
+    //@@     be inferred from the model output. It is optional to assign all
+    //@@     model outputs to ensemble tensors. One ensemble tensor name
+    //@@     can appear in an output map only once.
+    //@@
+    map<string, string> output_map = 4;
+  }
+
+  //@@  .. cpp:var:: Step step (repeated)
+  //@@
+  //@@     The models and the input / output mappings used within the ensemble.
+  //@@
+  repeated Step step = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelParameter
+//@@
+//@@   A model parameter.
+//@@
+message ModelParameter
+{
+  //@@  .. cpp:var:: string string_value
+  //@@
+  //@@     The string value of the parameter.
+  //@@
+  string string_value = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelWarmup
+//@@
+//@@   Settings used to construct the request sample for model warmup.
+//@@
+message ModelWarmup
+{
+  //@@
+  //@@  .. cpp:var:: message Input
+  //@@
+  //@@     Meta data associated with an input.
+  //@@
+  message Input
+  {
+    //@@    .. cpp:var:: DataType data_type
+    //@@
+    //@@       The data-type of the input.
+    //@@
+    DataType data_type = 1;
+
+    //@@    .. cpp:var:: int64 dims (repeated)
+    //@@
+    //@@       The shape of the input tensor, not including the batch dimension.
+    //@@
+    repeated int64 dims = 2;
+
+    //@@    .. cpp:var:: oneof input_data_type
+    //@@
+    //@@       Specify how the input data is generated. If the input has STRING
+    //@@       data type and 'random_data' is set, the data generation will fall
+    //@@       back to 'zero_data'.
+    //@@
+    oneof input_data_type
+    {
+      //@@
+      //@@    .. cpp:var:: bool zero_data
+      //@@
+      //@@       The identifier for using zeros as input data. Note that the
+      //@@       value of 'zero_data' will not be checked, instead, zero data
+      //@@       will be used as long as the field is set.
+      //@@
+      bool zero_data = 3;
+
+      //@@
+      //@@    .. cpp:var:: bool random_data
+      //@@
+      //@@       The identifier for using random data as input data. Note that
+      //@@       the value of 'random_data' will not be checked, instead,
+      //@@       random data will be used as long as the field is set.
+      //@@
+      bool random_data = 4;
+
+      //@@    .. cpp:var:: string input_data_file
+      //@@
+      //@@       The file whose content will be used as raw input data in
+      //@@       row-major order. The file must be provided in a sub-directory
+      //@@       'warmup' under the model directory. The file contents should be
+      //@@       in binary format. For TYPE_STRING data-type, an element is
+      //@@       represented by a 4-byte unsigned integer giving the length 
+      //@@       followed by the actual bytes.
+      //@@
+      string input_data_file = 5;
+    }
+  }
+
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the request sample.
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: uint32 batch_size
+  //@@
+  //@@     The batch size of the inference request. This must be >= 1. For
+  //@@     models that don't support batching, batch_size must be 1. If
+  //@@     batch_size > 1, the 'inputs' specified below will be duplicated to
+  //@@     match the batch size requested.
+  //@@
+  uint32 batch_size = 2;
+
+  //@@  .. cpp:var:: map<string, Input> inputs
+  //@@
+  //@@     The warmup meta data associated with every model input, including
+  //@@     control tensors.
+  //@@
+  map<string, Input> inputs = 3;
+
+  //@@  .. cpp:var:: uint32 count
+  //@@
+  //@@     The number of iterations that this warmup sample will be executed.
+  //@@     For example, if this field is set to 2, 2 model executions using this
+  //@@     sample will be scheduled for warmup. Default value is 0 which
+  //@@     indicates that this sample will be used only once.
+  //@@     Note that for sequence model, 'count' may not work well
+  //@@     because the model often expect a valid sequence of requests which
+  //@@     should be represented by a series of warmup samples. 'count > 1'
+  //@@     essentially "resends" one of the sample, which may invalidate the
+  //@@     sequence and result in unexpected warmup failure.
+  //@@
+  uint32 count = 4;
+}
+
+//@@
+//@@ .. cpp:var:: message ModelOperations
+//@@
+//@@    The metadata of libraries providing custom operations for this model.
+//@@
+message ModelOperations
+{
+  //@@  .. cpp:var:: string op_library_filename (repeated)
+  //@@
+  //@@     Optional paths of the libraries providing custom operations for
+  //@@     this model. Valid only for ONNX models.
+  //@@
+  repeated string op_library_filename = 1;
+}
+
+//@@
+//@@ .. cpp:var:: message ModelTransactionPolicy
+//@@
+//@@    The specification that describes the nature of transactions
+//@@    to be expected from the model.
+//@@
+message ModelTransactionPolicy
+{
+  //@@  .. cpp:var:: bool decoupled
+  //@@
+  //@@     Indicates whether responses generated by the model are decoupled with
+  //@@     the requests issued to it, which means the number of responses
+  //@@     generated by model may differ from number of requests issued, and
+  //@@     that the responses may be out of order relative to the order of
+  //@@     requests. The default is false, which means the model will generate
+  //@@     exactly one response for each request.
+  //@@
+  bool decoupled = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelRepositoryAgents
+//@@
+//@@   The repository agents for the model.
+//@@
+message ModelRepositoryAgents
+{
+  //@@
+  //@@  .. cpp:var:: message Agent
+  //@@
+  //@@     A repository agent that should be invoked for the specified
+  //@@     repository actions for this model.
+  //@@
+  message Agent
+  {
+    //@@    .. cpp:var:: string name
+    //@@
+    //@@       The name of the agent.
+    //@@
+    string name = 1;
+
+    //@@    .. cpp:var:: map<string, string> parameters
+    //@@
+    //@@       The parameters for the agent.
+    //@@
+    map<string, string> parameters = 2;
+  }
+
+  //@@
+  //@@  .. cpp:var:: Agent agents (repeated)
+  //@@
+  //@@     The ordered list of agents for the model. These agents will be
+  //@@     invoked in order to respond to repository actions occuring for the
+  //@@     model.
+  //@@
+  repeated Agent agents = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelResponseCache
+//@@
+//@@   The response cache setting for the model.
+//@@
+message ModelResponseCache
+{
+  //@@
+  //@@  .. cpp::var:: bool enable
+  //@@
+  //@@     Whether or not to use response cache for the model. If True, the
+  //@@     responses from the model are cached and when identical request
+  //@@     is encountered, instead of going through the model execution,
+  //@@     the response from the cache is utilized. By default, response
+  //@@     cache is disabled for the models.
+  //@@
+  bool enable = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelConfig
+//@@
+//@@   A model configuration.
+//@@
+message ModelConfig
+{
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the model.
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: string platform
+  //@@
+  //@@     The framework for the model. Possible values are
+  //@@     "tensorrt_plan", "tensorflow_graphdef",
+  //@@     "tensorflow_savedmodel", "onnxruntime_onnx",
+  //@@     "pytorch_libtorch".
+  //@@
+  string platform = 2;
+
+  //@@  .. cpp:var:: string backend
+  //@@
+  //@@     The backend used by the model.
+  //@@
+  string backend = 17;
+
+  //@@  .. cpp:var:: ModelVersionPolicy version_policy
+  //@@
+  //@@     Policy indicating which version(s) of the model will be served.
+  //@@
+  ModelVersionPolicy version_policy = 3;
+
+  //@@  .. cpp:var:: int32 max_batch_size
+  //@@
+  //@@     Maximum batch size allowed for inference. This can only decrease
+  //@@     what is allowed by the model itself. A max_batch_size value of 0
+  //@@     indicates that batching is not allowed for the model and the
+  //@@     dimension/shape of the input and output tensors must exactly
+  //@@     match what is specified in the input and output configuration. A
+  //@@     max_batch_size value > 0 indicates that batching is allowed and
+  //@@     so the model expects the input tensors to have an additional
+  //@@     initial dimension for the batching that is not specified in the
+  //@@     input (for example, if the model supports batched inputs of
+  //@@     2-dimensional tensors then the model configuration will specify
+  //@@     the input shape as [ X, Y ] but the model will expect the actual
+  //@@     input tensors to have shape [ N, X, Y ]). For max_batch_size > 0
+  //@@     returned outputs will also have an additional initial dimension
+  //@@     for the batch.
+  //@@
+  int32 max_batch_size = 4;
+
+  //@@  .. cpp:var:: ModelInput input (repeated)
+  //@@
+  //@@     The inputs request by the model.
+  //@@
+  repeated ModelInput input = 5;
+
+  //@@  .. cpp:var:: ModelOutput output (repeated)
+  //@@
+  //@@     The outputs produced by the model.
+  //@@
+  repeated ModelOutput output = 6;
+
+  //@@  .. cpp:var:: BatchInput batch_input (repeated)
+  //@@
+  //@@     The model input(s) that the server should use to communicate
+  //@@     batch related values to the model.
+  //@@
+  repeated BatchInput batch_input = 20;
+
+  //@@  .. cpp:var:: BatchOutput batch_output (repeated)
+  //@@
+  //@@     The outputs produced by the model that requires special handling
+  //@@     by the model backend.
+  //@@
+  repeated BatchOutput batch_output = 21;
+
+  //@@  .. cpp:var:: ModelOptimizationPolicy optimization
+  //@@
+  //@@     Optimization configuration for the model. If not specified
+  //@@     then default optimization policy is used.
+  //@@
+  ModelOptimizationPolicy optimization = 12;
+
+  //@@  .. cpp:var:: oneof scheduling_choice
+  //@@
+  //@@     The scheduling policy for the model. If not specified the
+  //@@     default scheduling policy is used for the model. The default
+  //@@     policy is to execute each inference request independently.
+  //@@
+  oneof scheduling_choice
+  {
+    //@@    .. cpp:var:: ModelDynamicBatching dynamic_batching
+    //@@
+    //@@       If specified, enables the dynamic-batching scheduling
+    //@@       policy. With dynamic-batching the scheduler may group
+    //@@       together independent requests into a single batch to
+    //@@       improve inference throughput.
+    //@@
+    ModelDynamicBatching dynamic_batching = 11;
+
+    //@@    .. cpp:var:: ModelSequenceBatching sequence_batching
+    //@@
+    //@@       If specified, enables the sequence-batching scheduling
+    //@@       policy. With sequence-batching, inference requests
+    //@@       with the same correlation ID are routed to the same
+    //@@       model instance. Multiple sequences of inference requests
+    //@@       may be batched together into a single batch to
+    //@@       improve inference throughput.
+    //@@
+    ModelSequenceBatching sequence_batching = 13;
+
+    //@@    .. cpp:var:: ModelEnsembling ensemble_scheduling
+    //@@
+    //@@       If specified, enables the model-ensembling scheduling
+    //@@       policy. With model-ensembling, inference requests
+    //@@       will be processed according to the specification, such as an
+    //@@       execution sequence of models. The input specified in this model
+    //@@       config will be the input for the ensemble, and the output
+    //@@       specified will be the output of the ensemble.
+    //@@
+    ModelEnsembling ensemble_scheduling = 15;
+  }
+
+  //@@  .. cpp:var:: ModelInstanceGroup instance_group (repeated)
+  //@@
+  //@@     Instances of this model. If not specified, one instance
+  //@@     of the model will be instantiated on each available GPU.
+  //@@
+  repeated ModelInstanceGroup instance_group = 7;
+
+  //@@  .. cpp:var:: string default_model_filename
+  //@@
+  //@@     Optional filename of the model file to use if a
+  //@@     compute-capability specific model is not specified in
+  //@@     :cpp:var:`cc_model_filenames`. If not specified the default name
+  //@@     is 'model.graphdef', 'model.savedmodel', 'model.plan' or
+  //@@     'model.pt' depending on the model type.
+  //@@
+  string default_model_filename = 8;
+
+  //@@  .. cpp:var:: map<string,string> cc_model_filenames
+  //@@
+  //@@     Optional map from CUDA compute capability to the filename of
+  //@@     the model that supports that compute capability. The filename
+  //@@     refers to a file within the model version directory.
+  //@@
+  map<string, string> cc_model_filenames = 9;
+
+  //@@  .. cpp:var:: map<string,string> metric_tags
+  //@@
+  //@@     Optional metric tags. User-specific key-value pairs for metrics
+  //@@     reported for this model. These tags are applied to the metrics
+  //@@     reported on the HTTP metrics port.
+  //@@
+  map<string, string> metric_tags = 10;
+
+  //@@  .. cpp:var:: map<string,ModelParameter> parameters
+  //@@
+  //@@     Optional model parameters. User-specified parameter values.
+  //@@
+  map<string, ModelParameter> parameters = 14;
+
+  //@@  .. cpp:var:: ModelWarmup model_warmup (repeated)
+  //@@
+  //@@     Warmup setting of this model. If specified, all instances
+  //@@     will be run with the request samples in sequence before
+  //@@     serving the model.
+  //@@     This field can only be specified if the model is not an ensemble
+  //@@     model.
+  //@@
+  repeated ModelWarmup model_warmup = 16;
+
+  //@@  .. cpp:var:: ModelOperations model_operations
+  //@@
+  //@@     Optional metadata of the libraries providing custom operations for
+  //@@     this model.
+  //@@
+  ModelOperations model_operations = 18;
+
+  //@@  .. cpp:var:: ModelTransactionPolicy model_transaction_policy
+  //@@
+  //@@     Optional specification that describes the nature of transactions
+  //@@     to be expected from the model.
+  //@@
+  ModelTransactionPolicy model_transaction_policy = 19;
+
+  //@@  .. cpp:var:: ModelRepositoryAgents model_repository_agents
+  //@@
+  //@@     Optional specification of the agent(s) that should be invoked
+  //@@     with repository actions are performed for this model.
+  //@@
+  ModelRepositoryAgents model_repository_agents = 23;
+
+  //@@  .. cpp:var:: ModelResponseCache response_cache
+  //@@
+  //@@     Optional setting for utilizing the response cache for this
+  //@@     model.
+  //@@
+  ModelResponseCache response_cache = 24;
+}
--- a/3rdparty/common-r22.12/src/async_work_queue.cc
+++ b/3rdparty/common-r22.12/src/async_work_queue.cc
+// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "triton/common/async_work_queue.h"
+
+namespace triton { namespace common {
+
+AsyncWorkQueue::~AsyncWorkQueue()
+{
+  GetSingleton()->thread_pool_.reset();
+}
+
+AsyncWorkQueue*
+AsyncWorkQueue::GetSingleton()
+{
+  static AsyncWorkQueue singleton;
+  return &singleton;
+}
+
+Error
+AsyncWorkQueue::Initialize(size_t worker_count)
+{
+  if (worker_count < 1) {
+    return Error(
+        Error::Code::INVALID_ARG,
+        "Async work queue must be initialized with positive 'worker_count'");
+  }
+
+  static std::mutex init_mtx;
+  std::lock_guard<std::mutex> lk(init_mtx);
+
+  if (GetSingleton()->thread_pool_) {
+    return Error(
+        Error::Code::ALREADY_EXISTS,
+        "Async work queue has been initialized with " +
+            std::to_string(GetSingleton()->thread_pool_->Size()) +
+            " 'worker_count'");
+  }
+
+  GetSingleton()->thread_pool_.reset(new ThreadPool(worker_count));
+  return Error::Success;
+}
+
+size_t
+AsyncWorkQueue::WorkerCount()
+{
+  if (!GetSingleton()->thread_pool_) {
+    return 0;
+  }
+  return GetSingleton()->thread_pool_->Size();
+}
+
+Error
+AsyncWorkQueue::AddTask(std::function<void(void)>&& task)
+{
+  if (!GetSingleton()->thread_pool_) {
+    return Error(
+        Error::Code::UNAVAILABLE,
+        "Async work queue must be initialized before adding task");
+  }
+  GetSingleton()->thread_pool_->Enqueue(std::move(task));
+
+  return Error::Success;
+}
+
+void
+AsyncWorkQueue::Reset()
+{
+  // Reconstruct the singleton to reset it
+  GetSingleton()->~AsyncWorkQueue();
+  new (GetSingleton()) AsyncWorkQueue();
+}
+
+}}  // namespace triton::common
--- a/3rdparty/common-r22.12/src/error.cc
+++ b/3rdparty/common-r22.12/src/error.cc
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "triton/common/error.h"
+
+namespace triton { namespace common {
+
+const Error Error::Success(Error::Code::SUCCESS);
+
+std::string
+Error::AsString() const
+{
+  std::string str(CodeString(code_));
+  str += ": " + msg_;
+  return str;
+}
+
+const char*
+Error::CodeString(const Code code)
+{
+  switch (code) {
+    case Error::Code::SUCCESS:
+      return "OK";
+    case Error::Code::UNKNOWN:
+      return "Unknown";
+    case Error::Code::INTERNAL:
+      return "Internal";
+    case Error::Code::NOT_FOUND:
+      return "Not found";
+    case Error::Code::INVALID_ARG:
+      return "Invalid argument";
+    case Error::Code::UNAVAILABLE:
+      return "Unavailable";
+    case Error::Code::UNSUPPORTED:
+      return "Unsupported";
+    case Error::Code::ALREADY_EXISTS:
+      return "Already exists";
+    default:
+      break;
+  }
+
+  return "<invalid code>";
+}
+
+}}  // namespace triton::common
--- a/3rdparty/common-r22.12/src/logging.cc
+++ b/3rdparty/common-r22.12/src/logging.cc
+// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "triton/common/logging.h"
+
+#ifdef _WIN32
+// suppress the min and max definitions in Windef.h.
+#define NOMINMAX
+#include <Windows.h>
+#else
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#endif
+#include <algorithm>
+#include <iomanip>
+#include <iostream>
+
+namespace triton { namespace common {
+
+Logger gLogger_;
+
+Logger::Logger()
+    : enables_{true, true, true}, vlevel_(0), format_(Format::kDEFAULT)
+{
+}
+
+void
+Logger::Log(const std::string& msg)
+{
+  const std::lock_guard<std::mutex> lock(mutex_);
+  if (file_stream_.is_open()) {
+    file_stream_ << msg << std::endl;
+  } else {
+    std::cerr << msg << std::endl;
+  }
+}
+
+void
+Logger::Flush()
+{
+  std::cerr << std::flush;
+}
+
+
+const std::vector<char> LogMessage::level_name_{'E', 'W', 'I'};
+
+LogMessage::LogMessage(const char* file, int line, uint32_t level)
+{
+  std::string path(file);
+  size_t pos = path.rfind('/');
+  if (pos != std::string::npos) {
+    path = path.substr(pos + 1, std::string::npos);
+  }
+
+  // 'L' below is placeholder for showing log level
+  switch (gLogger_.LogFormat()) {
+    case Logger::Format::kDEFAULT: {
+      // LMMDD hh:mm:ss.ssssss
+#ifdef _WIN32
+      SYSTEMTIME system_time;
+      GetSystemTime(&system_time);
+      stream_ << level_name_[std::min(level, (uint32_t)Level::kINFO)]
+              << std::setfill('0') << std::setw(2) << system_time.wMonth
+              << std::setw(2) << system_time.wDay << ' ' << std::setw(2)
+              << system_time.wHour << ':' << std::setw(2) << system_time.wMinute
+              << ':' << std::setw(2) << system_time.wSecond << '.'
+              << std::setw(6) << system_time.wMilliseconds * 1000 << ' '
+              << static_cast<uint32_t>(GetCurrentProcessId()) << ' ' << path
+              << ':' << line << "] ";
+#else
+      struct timeval tv;
+      gettimeofday(&tv, NULL);
+      struct tm tm_time;
+      gmtime_r(((time_t*)&(tv.tv_sec)), &tm_time);
+      stream_ << level_name_[std::min(level, (uint32_t)Level::kINFO)]
+              << std::setfill('0') << std::setw(2) << (tm_time.tm_mon + 1)
+              << std::setw(2) << tm_time.tm_mday << ' ' << std::setw(2)
+              << tm_time.tm_hour << ':' << std::setw(2) << tm_time.tm_min << ':'
+              << std::setw(2) << tm_time.tm_sec << '.' << std::setw(6)
+              << tv.tv_usec << ' ' << static_cast<uint32_t>(getpid()) << ' '
+              << path << ':' << line << "] ";
+#endif
+      break;
+    }
+    case Logger::Format::kISO8601: {
+      // YYYY-MM-DDThh:mm:ssZ L
+#ifdef _WIN32
+      SYSTEMTIME system_time;
+      GetSystemTime(&system_time);
+      stream_ << system_time.wYear << '-' << std::setfill('0') << std::setw(2)
+              << system_time.wMonth << '-' << std::setw(2) << system_time.wDay
+              << 'T' << std::setw(2) << system_time.wHour << ':' << std::setw(2)
+              << system_time.wMinute << ':' << std::setw(2)
+              << system_time.wSecond << "Z "
+              << level_name_[std::min(level, (uint32_t)Level::kINFO)] << ' '
+              << static_cast<uint32_t>(GetCurrentProcessId()) << ' ' << path
+              << ':' << line << "] ";
+#else
+      struct timeval tv;
+      gettimeofday(&tv, NULL);
+      struct tm tm_time;
+      gmtime_r(((time_t*)&(tv.tv_sec)), &tm_time);
+      stream_ << (tm_time.tm_year + 1900) << '-' << std::setfill('0')
+              << std::setw(2) << (tm_time.tm_mon + 1) << '-' << std::setw(2)
+              << tm_time.tm_mday << 'T' << std::setw(2) << tm_time.tm_hour
+              << ':' << std::setw(2) << tm_time.tm_min << ':' << std::setw(2)
+              << tm_time.tm_sec << "Z "
+              << level_name_[std::min(level, (uint32_t)Level::kINFO)] << ' '
+              << static_cast<uint32_t>(getpid()) << ' ' << path << ':' << line
+              << "] ";
+#endif
+      break;
+    }
+  }
+}
+
+LogMessage::~LogMessage()
+{
+  gLogger_.Log(stream_.str());
+}
+
+}}  // namespace triton::common
--- a/3rdparty/common-r22.12/src/model_config.cc
+++ b/3rdparty/common-r22.12/src/model_config.cc
+// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "triton/common/model_config.h"
+
+namespace triton { namespace common {
+
+bool
+IsFixedSizeDataType(const inference::DataType dtype)
+{
+  return dtype != inference::DataType::TYPE_STRING;
+}
+
+size_t
+GetDataTypeByteSize(const inference::DataType dtype)
+{
+  switch (dtype) {
+    case inference::DataType::TYPE_BOOL:
+      return 1;
+    case inference::DataType::TYPE_UINT8:
+      return 1;
+    case inference::DataType::TYPE_UINT16:
+      return 2;
+    case inference::DataType::TYPE_UINT32:
+      return 4;
+    case inference::DataType::TYPE_UINT64:
+      return 8;
+    case inference::DataType::TYPE_INT8:
+      return 1;
+    case inference::DataType::TYPE_INT16:
+      return 2;
+    case inference::DataType::TYPE_INT32:
+      return 4;
+    case inference::DataType::TYPE_INT64:
+      return 8;
+    case inference::DataType::TYPE_FP16:
+      return 2;
+    case inference::DataType::TYPE_FP32:
+      return 4;
+    case inference::DataType::TYPE_FP64:
+      return 8;
+    case inference::DataType::TYPE_STRING:
+      return 0;
+    case inference::DataType::TYPE_BF16:
+      return 2;
+    default:
+      break;
+  }
+
+  return 0;
+}
+
+int64_t
+GetElementCount(const DimsList& dims)
+{
+  bool first = true;
+  int64_t cnt = 0;
+  for (auto dim : dims) {
+    if (dim == WILDCARD_DIM) {
+      return -1;
+    }
+
+    if (first) {
+      cnt = dim;
+      first = false;
+    } else {
+      cnt *= dim;
+    }
+  }
+
+  return cnt;
+}
+
+int64_t
+GetElementCount(const std::vector<int64_t>& dims)
+{
+  bool first = true;
+  int64_t cnt = 0;
+  for (auto dim : dims) {
+    if (dim == WILDCARD_DIM) {
+      return -1;
+    }
+
+    if (first) {
+      cnt = dim;
+      first = false;
+    } else {
+      cnt *= dim;
+    }
+  }
+
+  return cnt;
+}
+
+int64_t
+GetElementCount(const inference::ModelInput& mio)
+{
+  return GetElementCount(mio.dims());
+}
+
+int64_t
+GetElementCount(const inference::ModelOutput& mio)
+{
+  return GetElementCount(mio.dims());
+}
+
+int64_t
+GetByteSize(const inference::DataType& dtype, const DimsList& dims)
+{
+  size_t dt_size = GetDataTypeByteSize(dtype);
+  if (dt_size == 0) {
+    return -1;
+  }
+
+  int64_t cnt = GetElementCount(dims);
+  if (cnt == -1) {
+    return -1;
+  }
+
+  return cnt * dt_size;
+}
+
+int64_t
+GetByteSize(const inference::DataType& dtype, const std::vector<int64_t>& dims)
+{
+  size_t dt_size = GetDataTypeByteSize(dtype);
+  if (dt_size == 0) {
+    return -1;
+  }
+
+  int64_t cnt = GetElementCount(dims);
+  if (cnt == -1) {
+    return -1;
+  }
+
+  return cnt * dt_size;
+}
+
+int64_t
+GetByteSize(
+    const int batch_size, const inference::DataType& dtype,
+    const DimsList& dims)
+{
+  if (dims.size() == 0) {
+    return batch_size * GetDataTypeByteSize(dtype);
+  }
+
+  int64_t bs = GetByteSize(dtype, dims);
+  if (bs == -1) {
+    return -1;
+  }
+
+  return std::max(1, batch_size) * bs;
+}
+
+int64_t
+GetByteSize(
+    const int batch_size, const inference::DataType& dtype,
+    const std::vector<int64_t>& dims)
+{
+  if (dims.size() == 0) {
+    return batch_size * GetDataTypeByteSize(dtype);
+  }
+
+  int64_t bs = GetByteSize(dtype, dims);
+  if (bs == -1) {
+    return -1;
+  }
+
+  return std::max(1, batch_size) * bs;
+}
+
+int64_t
+GetByteSize(const inference::ModelInput& mio)
+{
+  return GetByteSize(mio.data_type(), mio.dims());
+}
+
+int64_t
+GetByteSize(const inference::ModelOutput& mio)
+{
+  return GetByteSize(mio.data_type(), mio.dims());
+}
+
+int
+GetCpuNiceLevel(const inference::ModelConfig& config)
+{
+  int nice = SCHEDULER_DEFAULT_NICE;
+  if (config.has_optimization()) {
+    switch (config.optimization().priority()) {
+      case inference::ModelOptimizationPolicy::PRIORITY_MAX:
+        nice = 0;
+        break;
+      case inference::ModelOptimizationPolicy::PRIORITY_MIN:
+        nice = 19;
+        break;
+      default:
+        nice = SCHEDULER_DEFAULT_NICE;
+        break;
+    }
+  }
+
+  return nice;
+}
+
+bool
+CompareDims(const DimsList& dims0, const DimsList& dims1)
+{
+  if (dims0.size() != dims1.size()) {
+    return false;
+  }
+
+  for (int i = 0; i < dims0.size(); ++i) {
+    if (dims0[i] != dims1[i]) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool
+CompareDims(
+    const std::vector<int64_t>& dims0, const std::vector<int64_t>& dims1)
+{
+  if (dims0.size() != dims1.size()) {
+    return false;
+  }
+
+  for (size_t i = 0; i < dims0.size(); ++i) {
+    if (dims0[i] != dims1[i]) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool
+CompareDimsWithWildcard(const DimsList& dims0, const DimsList& dims1)
+{
+  if (dims0.size() != dims1.size()) {
+    return false;
+  }
+
+  for (int i = 0; i < dims0.size(); ++i) {
+    if ((dims0[i] != WILDCARD_DIM) && (dims1[i] != WILDCARD_DIM) &&
+        (dims0[i] != dims1[i])) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool
+CompareDimsWithWildcard(
+    const DimsList& dims0, const std::vector<int64_t>& dims1)
+{
+  if (dims0.size() != (int64_t)dims1.size()) {
+    return false;
+  }
+
+  for (int i = 0; i < dims0.size(); ++i) {
+    if ((dims0[i] != WILDCARD_DIM) && (dims1[i] != WILDCARD_DIM) &&
+        (dims0[i] != dims1[i])) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+std::string
+DimsListToString(const DimsList& dims)
+{
+  bool first = true;
+
+  std::string str("[");
+  for (const auto& dim : dims) {
+    if (!first) {
+      str += ",";
+    }
+    str += std::to_string(dim);
+    first = false;
+  }
+
+  str += "]";
+  return str;
+}
+
+std::string
+DimsListToString(const std::vector<int64_t>& dims, const int start_idx)
+{
+  int idx = 0;
+
+  std::string str("[");
+  for (const auto& dim : dims) {
+    if (idx >= start_idx) {
+      if (idx > start_idx) {
+        str += ",";
+      }
+      str += std::to_string(dim);
+    }
+
+    idx++;
+  }
+
+  str += "]";
+  return str;
+}
+
+const char*
+DataTypeToProtocolString(const inference::DataType dtype)
+{
+  switch (dtype) {
+    case inference::DataType::TYPE_BOOL:
+      return "BOOL";
+    case inference::DataType::TYPE_UINT8:
+      return "UINT8";
+    case inference::DataType::TYPE_UINT16:
+      return "UINT16";
+    case inference::DataType::TYPE_UINT32:
+      return "UINT32";
+    case inference::DataType::TYPE_UINT64:
+      return "UINT64";
+    case inference::DataType::TYPE_INT8:
+      return "INT8";
+    case inference::DataType::TYPE_INT16:
+      return "INT16";
+    case inference::DataType::TYPE_INT32:
+      return "INT32";
+    case inference::DataType::TYPE_INT64:
+      return "INT64";
+    case inference::DataType::TYPE_FP16:
+      return "FP16";
+    case inference::DataType::TYPE_FP32:
+      return "FP32";
+    case inference::DataType::TYPE_FP64:
+      return "FP64";
+    case inference::DataType::TYPE_STRING:
+      return "BYTES";
+    case inference::DataType::TYPE_BF16:
+      return "BF16";
+    default:
+      break;
+  }
+
+  return "<invalid>";
+}
+
+inference::DataType
+ProtocolStringToDataType(const std::string& dtype)
+{
+  return ProtocolStringToDataType(dtype.c_str(), dtype.size());
+}
+
+inference::DataType
+ProtocolStringToDataType(const char* dtype, size_t len)
+{
+  if (len < 4 || len > 6) {
+    return inference::DataType::TYPE_INVALID;
+  }
+
+  if ((*dtype == 'I') && (len != 6)) {
+    if ((dtype[1] == 'N') && (dtype[2] == 'T')) {
+      if ((dtype[3] == '8') && (len == 4)) {
+        return inference::DataType::TYPE_INT8;
+      } else if ((dtype[3] == '1') && (dtype[4] == '6')) {
+        return inference::DataType::TYPE_INT16;
+      } else if ((dtype[3] == '3') && (dtype[4] == '2')) {
+        return inference::DataType::TYPE_INT32;
+      } else if ((dtype[3] == '6') && (dtype[4] == '4')) {
+        return inference::DataType::TYPE_INT64;
+      }
+    }
+  } else if ((*dtype == 'U') && (len != 4)) {
+    if ((dtype[1] == 'I') && (dtype[2] == 'N') && (dtype[3] == 'T')) {
+      if ((dtype[4] == '8') && (len == 5)) {
+        return inference::DataType::TYPE_UINT8;
+      } else if ((dtype[4] == '1') && (dtype[5] == '6')) {
+        return inference::DataType::TYPE_UINT16;
+      } else if ((dtype[4] == '3') && (dtype[5] == '2')) {
+        return inference::DataType::TYPE_UINT32;
+      } else if ((dtype[4] == '6') && (dtype[5] == '4')) {
+        return inference::DataType::TYPE_UINT64;
+      }
+    }
+  } else if ((*dtype == 'F') && (dtype[1] == 'P') && (len == 4)) {
+    if ((dtype[2] == '1') && (dtype[3] == '6')) {
+      return inference::DataType::TYPE_FP16;
+    } else if ((dtype[2] == '3') && (dtype[3] == '2')) {
+      return inference::DataType::TYPE_FP32;
+    } else if ((dtype[2] == '6') && (dtype[3] == '4')) {
+      return inference::DataType::TYPE_FP64;
+    }
+  } else if (*dtype == 'B') {
+    switch (dtype[1]) {
+      case 'Y':
+        if (!strcmp(dtype + 2, "TES")) {
+          return inference::DataType::TYPE_STRING;
+        }
+        break;
+      case 'O':
+        if (!strcmp(dtype + 2, "OL")) {
+          return inference::DataType::TYPE_BOOL;
+        }
+        break;
+      case 'F':
+        if (!strcmp(dtype + 2, "16")) {
+          return inference::DataType::TYPE_BF16;
+        }
+        break;
+    }
+  }
+
+  return inference::DataType::TYPE_INVALID;
+}
+
+}}  // namespace triton::common
--- a/3rdparty/common-r22.12/src/table_printer.cc
+++ b/3rdparty/common-r22.12/src/table_printer.cc
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "triton/common/table_printer.h"
+
+#ifdef _WIN32
+// suppress the min and max definitions in Windef.h.
+#define NOMINMAX
+#include <Windows.h>
+#else
+#include <sys/ioctl.h>
+#include <unistd.h>
+#endif
+#include <algorithm>
+#include <iomanip>
+#include <iostream>
+#include <memory>
+#include <numeric>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace triton { namespace common {
+
+//
+// ASCII table printer.
+//
+void
+TablePrinter::InsertRow(const std::vector<std::string>& row)
+{
+  std::vector<std::vector<std::string>> table_row;
+
+  // Number of lines in each field in the record
+  size_t max_height = 0;
+
+  // Update max length of data items in each row
+  for (size_t i = 0; i < row.size(); ++i) {
+    table_row.push_back(std::vector<std::string>{});
+    std::stringstream ss(row[i]);
+    std::string line;
+
+    size_t max_width = 0;
+    while (std::getline(ss, line, '\n')) {
+      table_row[i].push_back(line);
+      if (line.size() > max_width)
+        max_width = line.size();
+    }
+
+    if (max_width > max_widths_[i])
+      max_widths_[i] = max_width;
+
+    size_t number_of_lines = table_row[i].size();
+    if (max_height < number_of_lines)
+      max_height = number_of_lines;
+  }
+
+  max_heights_.push_back(max_height);
+  data_.emplace_back(table_row);
+}
+
+void
+TablePrinter::FairShare()
+{
+  // initialize original index locations
+  size_t array_size = max_widths_.size();
+  std::vector<size_t> idx(array_size);
+  iota(idx.begin(), idx.end(), 0);
+
+  stable_sort(idx.begin(), idx.end(), [this](size_t i1, size_t i2) {
+    return this->max_widths_[i1] < this->max_widths_[i2];
+  });
+
+  size_t loop_index = 1;
+  for (auto itr = idx.begin(); itr != idx.end(); ++itr) {
+    // If a column is not using all the space allocated to it
+    if (max_widths_[*itr] < shares_[*itr]) {
+      float excess = shares_[*itr] - max_widths_[*itr];
+      shares_[*itr] -= excess;
+
+      if (itr == idx.end() - 1)
+        break;
+      auto update_itr = idx.begin() + (itr - idx.begin() + 1);
+
+      // excess amount of unused space that must be distributed evenly to the
+      // next columns
+      float excess_per_column = excess / (array_size - loop_index);
+
+      for (; update_itr != idx.end(); ++update_itr) {
+        shares_[*update_itr] += excess_per_column;
+        excess -= excess_per_column;
+      }
+    }
+    ++loop_index;
+  }
+
+  // Remove any decimal shares
+  for (auto itr = idx.begin(); itr != idx.end(); ++itr) {
+    shares_[*itr] = (size_t)shares_[*itr];
+  }
+
+  // For each record
+  for (size_t i = 0; i < data_.size(); i++) {
+    auto current_row = data_[i];
+
+    // For each field in the record
+    for (size_t j = 0; j < current_row.size(); j++) {
+      // For each line in the record
+      for (size_t line_index = 0; line_index < current_row[j].size();
+           line_index++) {
+        std::string line = current_row[j][line_index];
+        size_t num_rows = (line.size() + shares_[j] - 1) / shares_[j];
+
+        // If the number of rows required for this record is larger than 1, we
+        // will break that line and put it in multiple lines
+        if (num_rows > 1) {
+          // Remove the multi-line field, it will be replaced by the line
+          // that can fits the column size
+          data_[i][j].erase(data_[i][j].begin() + line_index);
+          for (size_t k = 0; k < num_rows; k++) {
+            size_t start_index =
+                std::min((size_t)(k * shares_[j]), line.size());
+            size_t end_index =
+                std::min((size_t)((k + 1) * shares_[j]), line.size());
+            data_[i][j].insert(
+                data_[i][j].begin() + line_index + k,
+                line.substr(start_index, end_index - start_index));
+          }
+
+          // We need to advance the index for the splitted lines.
+          line_index += num_rows - 1;
+        }
+
+        if (max_heights_[i] < (num_rows - 1 + current_row[j].size()))
+          max_heights_[i] += num_rows - 1;
+      }
+    }
+  }
+}
+
+void
+TablePrinter::AddRow(std::stringstream& table, size_t row_index)
+{
+  auto row = data_[row_index];
+  size_t max_height = max_heights_[row_index];
+
+  for (size_t j = 0; j < max_height; j++) {
+    table << "|" << std::left;
+
+    for (size_t i = 0; i < row.size(); i++) {
+      if (j < row[i].size())
+        table << " " << std::setw(shares_[i]) << row[i][j] << " |";
+      else
+        table << " " << std::setw(shares_[i]) << " "
+              << " |";
+    }
+
+    // Do not add new line if this is the last row of this record
+    if (j != max_height - 1)
+      table << "\n";
+  }
+  table << "\n";
+}
+
+void
+TablePrinter::AddRowDivider(std::stringstream& table)
+{
+  table << "+";
+  for (const auto& share : shares_) {
+    for (size_t i = 0; i < share + 2; i++) table << "-";
+    table << "+";
+  }
+  table << "\n";
+}
+
+std::string
+TablePrinter::PrintTable()
+{
+  std::stringstream table;
+  table << "\n";
+
+  FairShare();
+
+  AddRowDivider(table);
+  // Add table headers
+  AddRow(table, 0);
+  AddRowDivider(table);
+
+  for (size_t j = 1; j < data_.size(); j++) {
+    AddRow(table, j);
+  }
+
+  AddRowDivider(table);
+
+  return table.str();
+}
+
+// TablePrinter will take the ownership of `headers`.
+TablePrinter::TablePrinter(const std::vector<std::string>& headers)
+{
+  // terminal size
+  size_t column_size = 500;
+#ifdef _WIN32
+  CONSOLE_SCREEN_BUFFER_INFO csbi;
+  int ret = GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
+  if (ret && (csbi.dwSize.X != 0)) {
+    column_size = csbi.dwSize.X;
+  }
+#else
+  struct winsize terminal_size;
+  int status = ioctl(STDOUT_FILENO, TIOCGWINSZ, &terminal_size);
+  if ((status == 0) && (terminal_size.ws_col != 0)) {
+    column_size = terminal_size.ws_col;
+  }
+#endif
+
+  for (size_t i = 0; i < headers.size(); ++i) {
+    max_widths_.emplace_back(0);
+  }
+
+  // Calculate fair share of every column
+  size_t number_of_columns = headers.size();
+
+  // Terminal width is the actual terminal width minus two times spaces
+  // required before and after each column and number of columns plus 1 for
+  // the pipes between the columns
+  size_t terminal_width =
+      column_size - (2 * number_of_columns) - (number_of_columns + 1);
+  int equal_share = terminal_width / headers.size();
+
+  for (size_t i = 0; i < headers.size(); ++i) {
+    shares_.emplace_back(equal_share);
+    terminal_width -= equal_share;
+  }
+
+  InsertRow(headers);
+}
+
+}}  // namespace triton::common
--- a/3rdparty/common-r22.12/src/thread_pool.cc
+++ b/3rdparty/common-r22.12/src/thread_pool.cc
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "triton/common/thread_pool.h"
+#include <stdexcept>
+
+namespace triton { namespace common {
+
+ThreadPool::ThreadPool(size_t thread_count)
+{
+  if (!thread_count) {
+    throw std::invalid_argument("Thread count must be greater than zero.");
+  }
+
+  // Define infinite loop for each thread to wait for a task to complete
+  const auto worker_loop = [this]() {
+    while (true) {
+      Task task;
+      {
+        std::unique_lock<std::mutex> lk(queue_mtx_);
+        // Wake if there's a task to do, or the pool has been stopped.
+        cv_.wait(lk, [&]() { return !task_queue_.empty() || stop_; });
+        // Exit condition
+        if (stop_ && task_queue_.empty()) {
+          break;
+        }
+        task = std::move(task_queue_.front());
+        task_queue_.pop();
+      }
+
+      // Execute task - ensure function has a valid target
+      if (task) {
+        task();
+      }
+    }
+  };
+
+  workers_.reserve(thread_count);
+  for (size_t i = 0; i < thread_count; ++i) {
+    workers_.emplace_back(worker_loop);
+  }
+}
+
+ThreadPool::~ThreadPool()
+{
+  {
+    std::lock_guard<std::mutex> lk(queue_mtx_);
+    // Signal to each worker that it should exit loop when tasks are finished
+    stop_ = true;
+  }
+  // Wake all threads to clean up
+  cv_.notify_all();
+  for (auto& t : workers_) {
+    t.join();
+  }
+}
+
+void
+ThreadPool::Enqueue(Task&& task)
+{
+  {
+    std::lock_guard<std::mutex> lk(queue_mtx_);
+    // Don't accept more work if pool is shutting down
+    if (stop_) {
+      return;
+    }
+    task_queue_.push(std::move(task));
+  }
+  // Only wake one thread per task
+  // Todo: DLIS-3859 if ThreadPool gets used more.
+  cv_.notify_one();
+}
+
+}}  // namespace triton::common
--- a/3rdparty/common-r22.12/tools/format.py
+++ b/3rdparty/common-r22.12/tools/format.py
+#!/usr/bin/python
+
+# Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+import subprocess
+import yapf
+
+FLAGS = None
+FORMAT_EXTS = ('proto', 'cc', 'cu', 'h')
+SKIP_PATHS = ('tools',)
+
+
+def visit(path):
+    if FLAGS.verbose:
+        print("visiting " + path)
+
+    valid_ext = False
+    python_file = False
+    for ext in FORMAT_EXTS:
+        if path.endswith('.' + ext):
+            valid_ext = True
+            break
+    if path.endswith('.py'):
+        valid_ext = True
+        python_file = True
+    if not valid_ext:
+        if FLAGS.verbose:
+            print("skipping due to extension: " + path)
+        return True
+
+    for skip in SKIP_PATHS:
+        if path.startswith(skip):
+            if FLAGS.verbose:
+                print("skipping due to path prefix: " + path)
+            return True
+    if python_file:
+        yapf.yapflib.yapf_api.FormatFile(path,
+                                         in_place=True,
+                                         style_config='google')
+        return True
+    else:
+        args = ['clang-format-6.0', '--style=file', '-i']
+        if FLAGS.verbose:
+            args.append('-verbose')
+        args.append(path)
+
+        ret = subprocess.call(args)
+        if ret != 0:
+            print("format failed for " + path)
+            return False
+
+    return True
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-v',
+                        '--verbose',
+                        action="store_true",
+                        required=False,
+                        default=False,
+                        help='Enable verbose output')
+    parser.add_argument('paths',
+                        type=str,
+                        nargs='*',
+                        default=None,
+                        help='Directories or files to format')
+    FLAGS = parser.parse_args()
+
+    # Check the version of yapf. Needs a consistent version
+    # of yapf to prevent unneccessary changes in the code.
+    if (yapf.__version__ != '0.30.0'):
+        print("Needs yapf 0.30.0, but got yapf {}".format(yapf.__version__))
+
+    if (FLAGS.paths is None) or (len(FLAGS.paths) == 0):
+        parser.print_help()
+        exit(1)
+
+    ret = True
+    for path in FLAGS.paths:
+        if not os.path.isdir(path):
+            if not visit(path):
+                ret = False
+        else:
+            for root, dirs, files in os.walk(path):
+                for name in files:
+                    if not visit(os.path.join(root, name)):
+                        ret = False
+
+    exit(0 if ret else 1)
--- a/3rdparty/common-r22.12/tools/pre-commit
+++ b/3rdparty/common-r22.12/tools/pre-commit
+#!/bin/bash
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+###############################################################################
+#
+# Git pre-commit hook for Triton related projects
+#
+# To install this hook for a project, copy "pre-commit" and "format.py" into
+# ".git/hooks/" directory of the project
+#
+###############################################################################
+
+###############################################################################
+#
+# Run formatter script
+#
+###############################################################################
+
+# Repo root
+GIT_REPO_ROOT=$(git rev-parse --show-toplevel)
+
+PYTHON_CMD=python3
+FORMATTER_PY=${GIT_REPO_ROOT}/.git/hooks/format.py
+
+CHANGED_FILES="$(git --no-pager diff --name-status --no-color --cached | awk '{ if (match($1, /R[0-9]+/)) { print $3 } else if ($1 != "D") { print $2 } }')"
+
+echo "Running Python auto-format..."
+for CHANGED_FILE in $CHANGED_FILES;
+do
+    ${PYTHON_CMD} ${FORMATTER_PY} ${GIT_REPO_ROOT}/${CHANGED_FILE}
+    git add ${GIT_REPO_ROOT}/${CHANGED_FILE}
+done
--- a/3rdparty/core-r22.12/.clang-format
+++ b/3rdparty/core-r22.12/.clang-format
+---
+BasedOnStyle: Google
+
+IndentWidth: 2
+ContinuationIndentWidth: 4
+UseTab: Never
+MaxEmptyLinesToKeep: 2
+
+SortIncludes: true
+CompactNamespaces: true
+ReflowComments: true
+
+DerivePointerAlignment: false
+PointerAlignment: Left
+
+AllowShortIfStatementsOnASingleLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Inline
+
+AlwaysBreakAfterReturnType: TopLevelDefinitions
+AlignAfterOpenBracket: AlwaysBreak
+BreakBeforeBraces: Custom
+BraceWrapping:
+  AfterClass: false
+  AfterControlStatement: false
+  AfterEnum: false
+  AfterFunction: true
+  AfterNamespace: false
+  AfterStruct: false
+  AfterUnion: false
+  BeforeCatch: true
+
+BinPackArguments: true
+BinPackParameters: true
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+
+IndentCaseLabels: true
\ No newline at end of file
--- a/3rdparty/core-r22.12/.gitignore
+++ b/3rdparty/core-r22.12/.gitignore
+/build
+/.vscode
+*.so
--- a/3rdparty/core-r22.12/CMakeLists.txt
+++ b/3rdparty/core-r22.12/CMakeLists.txt
+# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#cmake_minimum_required(VERSION 3.18)
+cmake_minimum_required(VERSION 3.16)
+project(tritoncore LANGUAGES C CXX)
+
+# Control building of shared library vs. only headers and stub. By
+# default only the headers and library stub is built. Set
+# TRITON_CORE_HEADERS_ONLY=OFF to also build libtritonserver.so.
+option(TRITON_CORE_HEADERS_ONLY "Build only headers and stub" ON)
+
+
+#
+# Triton Server API
+#
+add_library(
+  triton-core-serverapi INTERFACE
+)
+
+add_library(
+  TritonCore::triton-core-serverapi ALIAS triton-core-serverapi
+)
+
+target_include_directories(
+  triton-core-serverapi
+  INTERFACE
+    $<INSTALL_INTERFACE:include>
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+)
+
+#
+# Triton Backend API
+#
+add_library(
+  triton-core-backendapi INTERFACE
+)
+
+add_library(
+  TritonCore::triton-core-backendapi ALIAS triton-core-backendapi
+)
+
+target_include_directories(
+  triton-core-backendapi
+  INTERFACE
+    $<INSTALL_INTERFACE:include>
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+)
+
+#
+# Triton RepoAgent API
+#
+add_library(
+  triton-core-repoagentapi INTERFACE
+)
+
+add_library(
+  TritonCore::triton-core-repoagentapi ALIAS triton-core-repoagentapi
+)
+
+target_include_directories(
+  triton-core-repoagentapi
+  INTERFACE
+    $<INSTALL_INTERFACE:include>
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+)
+
+#
+# Stub library for libtritonserver.so that stubs Triton Server API and
+# Triton Backend API
+#
+add_library(
+  triton-core-serverstub SHARED
+  ${CMAKE_CURRENT_SOURCE_DIR}/src/tritonserver_stub.cc
+)
+
+add_library(
+  TritonCore::triton-core-serverstub ALIAS triton-core-serverstub
+)
+
+target_compile_features(triton-core-serverstub PRIVATE cxx_std_11)
+if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+  message("Using MSVC as compiler, default target on Windows 10. "
+		  "If the target system is not Windows 10, please update _WIN32_WINNT "
+		  "to corresponding value.")
+  target_compile_options(
+    triton-core-serverstub
+    PRIVATE
+      /Wall /D_WIN32_WINNT=0x0A00 /EHsc
+  )
+else()
+  target_compile_options(
+    triton-core-serverstub
+    PRIVATE
+      -Wall -Wextra -Wno-unused-parameter -Werror
+  )
+endif()
+
+set_target_properties(
+  triton-core-serverstub
+  PROPERTIES
+    POSITION_INDEPENDENT_CODE ON
+    OUTPUT_NAME tritonserver
+)
+
+#
+# Shared library implementing Triton Server API
+#
+if(NOT TRITON_CORE_HEADERS_ONLY)
+  include(CMakeDependentOption)
+
+  set(TRITON_VERSION "0.0.0" CACHE STRING "The version of the Triton shared library" )
+
+  option(TRITON_ENABLE_LOGGING "Include logging support in server" ON)
+  option(TRITON_ENABLE_STATS "Include statistics collections in server" ON)
+  option(TRITON_ENABLE_TRACING "Include tracing support in server" OFF)
+  option(TRITON_ENABLE_NVTX "Include NVTX support in server" OFF)
+  option(TRITON_ENABLE_GPU "Enable GPU support in server" ON)
+  option(TRITON_ENABLE_MALI_GPU "Enable Arm Mali GPU support in server" OFF)
+  set(TRITON_MIN_COMPUTE_CAPABILITY "6.0" CACHE STRING
+      "The minimum CUDA compute capability supported by Triton" )
+  set(TRITON_EXTRA_LIB_PATHS "" CACHE PATH "Extra library paths for Triton Server build")
+
+  # Ensemble
+  option(TRITON_ENABLE_ENSEMBLE "Include ensemble support in server" OFF)
+
+  # Metrics
+  option(TRITON_ENABLE_METRICS "Include metrics support in server" ON)
+  option(TRITON_ENABLE_METRICS_GPU "Include GPU metrics support in server" ON)
+  option(TRITON_ENABLE_METRICS_CPU "Include CPU metrics support in server" ON)
+
+  # Cloud storage
+  option(TRITON_ENABLE_GCS "Include GCS Filesystem support in server" OFF)
+  option(TRITON_ENABLE_S3 "Include S3 Filesystem support in server" OFF)
+  option(TRITON_ENABLE_AZURE_STORAGE "Include Azure Storage Filesystem support in server" OFF)
+
+  # Repo tags
+  set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
+  set(TRITON_THIRD_PARTY_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/third_party repo")
+
+  # Third-party location
+  set(TRITON_THIRD_PARTY_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party" CACHE STRING "Location of third-party build")
+  set(TRITON_THIRD_PARTY_SRC_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party-src" CACHE STRING "Location of third-party source")
+
+  if(TRITON_ENABLE_METRICS AND NOT TRITON_ENABLE_STATS)
+    message(FATAL_ERROR "TRITON_ENABLE_METRICS=ON requires TRITON_ENABLE_STATS=ON")
+  endif()
+
+  if(TRITON_ENABLE_TRACING AND NOT TRITON_ENABLE_STATS)
+    message(FATAL_ERROR "TRITON_ENABLE_TRACING=ON requires TRITON_ENABLE_STATS=ON")
+  endif()
+
+  if (TRITON_ENABLE_METRICS_CPU AND NOT TRITON_ENABLE_METRICS)
+    message(FATAL_ERROR "TRITON_ENABLE_METRICS_CPU=ON requires TRITON_ENABLE_METRICS=ON")
+  endif()
+
+  if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_METRICS)
+    message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_METRICS=ON")
+  endif()
+
+  if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_GPU)
+    message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_GPU=ON")
+  endif()
+
+  include(FetchContent)
+  FetchContent_Declare(
+    repo-third-party
+    GIT_REPOSITORY https://github.com/triton-inference-server/third_party.git
+    GIT_TAG ${TRITON_THIRD_PARTY_REPO_TAG}
+  )
+  FetchContent_MakeAvailable(repo-third-party)
+
+  # Need to use ExternalProject for our builds so that we can get the
+  # correct dependencies between Triton shared library components and
+  # the ExternalProject dependencies (found in the third_party repo)
+  include(ExternalProject)
+
+  # If CMAKE_TOOLCHAIN_FILE is set, propagate that hint path to the external
+  # projects.
+  set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "")
+  if (CMAKE_TOOLCHAIN_FILE)
+    set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "-DCMAKE_TOOLCHAIN_FILE:PATH=${CMAKE_TOOLCHAIN_FILE}")
+  endif()
+
+  # If VCPKG_TARGET_TRIPLET is set, propagate that hint path to the external
+  # projects.
+  set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "")
+  if (VCPKG_TARGET_TRIPLET)
+    set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "-DVCPKG_TARGET_TRIPLET:STRING=${VCPKG_TARGET_TRIPLET}")
+  endif()
+
+  # If OPENSSL_ROOT_DIR is set, propagate that hint path to the external
+  # projects with OpenSSL dependency.
+  set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "")
+  if (OPENSSL_ROOT_DIR)
+    set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "-DOPENSSL_ROOT_DIR:PATH=${OPENSSL_ROOT_DIR}")
+  endif()
+
+  # Location where protobuf-config.cmake will be installed varies by
+  # platform
+  if (WIN32)
+    set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/cmake")
+  else()
+    set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/lib/cmake/protobuf")
+  endif()
+
+  if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+    set(TRITON_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/install)
+  else()
+    set(TRITON_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
+  endif()
+
+  set(TRITON_DEPENDS googletest protobuf)
+  if(${TRITON_ENABLE_GCS})
+    set(TRITON_DEPENDS ${TRITON_DEPENDS} google-cloud-cpp)
+  endif() # TRITON_ENABLE_GCS
+  if(${TRITON_ENABLE_S3})
+    set(TRITON_DEPENDS ${TRITON_DEPENDS} aws-sdk-cpp)
+  endif() # TRITON_ENABLE_S3
+  if(${TRITON_ENABLE_AZURE_STORAGE})
+    set(TRITON_DEPENDS ${TRITON_DEPENDS} azure-storage-cpplite)
+  endif() # TRITON_ENABLE_AZURE_STORAGE
+  if(${TRITON_ENABLE_METRICS})
+    set(TRITON_DEPENDS ${TRITON_DEPENDS} prometheus-cpp)
+  endif() # TRITON_ENABLE_METRICS
+  if(${TRITON_ENABLE_GPU})
+    set(TRITON_DEPENDS ${TRITON_DEPENDS} cnmem)
+  endif() # TRITON_ENABLE_GPU
+
+  ExternalProject_Add(triton-core
+    PREFIX triton-core
+    SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src"
+    BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/triton-core"
+    CMAKE_CACHE_ARGS
+      -DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
+      ${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
+      ${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
+      ${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
+      -DGTEST_ROOT:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/googletest
+      -DgRPC_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/grpc/lib/cmake/grpc
+      -Dc-ares_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/c-ares/lib/cmake/c-ares
+      -Dabsl_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/absl/lib/cmake/absl
+      -Dnlohmann_json_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/nlohmann_json/lib/cmake/nlohmann_json
+      -Dprometheus-cpp_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/prometheus-cpp/lib/cmake/prometheus-cpp
+      -Dgoogle_cloud_cpp_storage_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/lib/cmake/google_cloud_cpp_storage
+      -Dgoogle_cloud_cpp_rest_internal_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/lib/cmake/google_cloud_cpp_rest_internal
+      -Dazure-storage-cpplite_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/azure-storage-cpplite
+      -Dgoogle_cloud_cpp_common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/lib/cmake/google_cloud_cpp_common
+      -DCrc32c_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/crc32c/lib/cmake/Crc32c
+      -DAWSSDK_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/cmake/AWSSDK
+      -Daws-cpp-sdk-core_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/cmake/aws-cpp-sdk-core
+      -Daws-cpp-sdk-s3_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/cmake/aws-cpp-sdk-s3
+      -Daws-c-event-stream_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/aws-c-event-stream/cmake
+      -Daws-c-common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/aws-c-common/cmake
+      -Daws-checksums_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/aws-checksums/cmake
+      -DCNMEM_PATH:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/cnmem
+      -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG}
+      -DTRITON_EXTRA_LIB_PATHS:PATH=${TRITON_EXTRA_LIB_PATHS}
+      -DTRITON_ENABLE_NVTX:BOOL=${TRITON_ENABLE_NVTX}
+      -DTRITON_ENABLE_TRACING:BOOL=${TRITON_ENABLE_TRACING}
+      -DTRITON_ENABLE_LOGGING:BOOL=${TRITON_ENABLE_LOGGING}
+      -DTRITON_ENABLE_STATS:BOOL=${TRITON_ENABLE_STATS}
+      -DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
+      -DTRITON_ENABLE_MALI_GPU:BOOL=${TRITON_ENABLE_MALI_GPU}
+      -DTRITON_MIN_COMPUTE_CAPABILITY:STRING=${TRITON_MIN_COMPUTE_CAPABILITY}
+      -DTRITON_ENABLE_METRICS:BOOL=${TRITON_ENABLE_METRICS}
+      -DTRITON_ENABLE_METRICS_GPU:BOOL=${TRITON_ENABLE_METRICS_GPU}
+      -DTRITON_ENABLE_METRICS_CPU:BOOL=${TRITON_ENABLE_METRICS_CPU}
+      -DTRITON_ENABLE_GCS:BOOL=${TRITON_ENABLE_GCS}
+      -DTRITON_ENABLE_AZURE_STORAGE:BOOL=${TRITON_ENABLE_AZURE_STORAGE}
+      -DTRITON_ENABLE_S3:BOOL=${TRITON_ENABLE_S3}
+      -DTRITON_ENABLE_ENSEMBLE:BOOL=${TRITON_ENABLE_ENSEMBLE}
+      -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
+      -DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
+      -DTRITON_VERSION:STRING=${TRITON_VERSION}
+    DEPENDS ${TRITON_DEPENDS}
+  )
+endif() # NOT TRITON_CORE_HEADERS_ONLY
+
+#
+# Install
+#
+include(GNUInstallDirs)
+set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonCore)
+
+install(
+  TARGETS
+    triton-core-backendapi
+    triton-core-repoagentapi
+    triton-core-serverapi
+  EXPORT
+    triton-core-targets
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
+
+install(
+  TARGETS
+    triton-core-serverstub
+  EXPORT
+    triton-core-targets
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/stubs
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/stubs
+  RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}/stubs
+)
+
+install(
+  DIRECTORY include/
+  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+)
+
+install(
+  EXPORT
+    triton-core-targets
+  FILE
+    TritonCoreTargets.cmake
+  NAMESPACE
+    TritonCore::
+  DESTINATION
+    ${INSTALL_CONFIGDIR}
+)
+
+include(CMakePackageConfigHelpers)
+configure_package_config_file(
+  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonCoreConfig.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonCoreConfig.cmake
+  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+install(
+  FILES
+    ${CMAKE_CURRENT_BINARY_DIR}/TritonCoreConfig.cmake
+  DESTINATION
+    ${INSTALL_CONFIGDIR}
+)
+
+#
+# Export from build tree
+#
+export(
+  EXPORT
+    triton-core-targets
+  FILE
+    ${CMAKE_CURRENT_BINARY_DIR}/TritonCoreTargets.cmake
+  NAMESPACE
+    TritonCore::
+)
+
+export(PACKAGE TritonCore)
--- a/3rdparty/core-r22.12/LICENSE
+++ b/3rdparty/core-r22.12/LICENSE
+Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+ * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+ * Neither the name of NVIDIA CORPORATION nor the names of its
+   contributors may be used to endorse or promote products derived
+   from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/3rdparty/core-r22.12/README.md
+++ b/3rdparty/core-r22.12/README.md
+<!--
+# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
+
+# Triton Inference Server Core
+
+This repository holds the source code and headers for the library that
+implements the core functionality of Triton. The *core* library can be
+built as described below and used directly via its [C
+API](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/inference_protocols.md#in-process-triton-server-api). To
+be useful the core library must be paired with one or more backends.
+You can learn more about backends in the [backend
+repo](https://github.com/triton-inference-server/backend).
+
+Typically you do not build or use the core library on its own, but as
+part of the *tritonserver* executable. The *tritonserver* executable
+is built in the [server
+repo](https://github.com/triton-inference-server/server) as described
+in the [server build
+documentation](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/build.md).
+
+Ask questions or report problems in the main Triton [issues
+page](https://github.com/triton-inference-server/server/issues).
+
+## Build the Triton Core Library
+
+Before building the Triton core library, your build system must
+install the required dependencies described in the [build
+documentation](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/build.md). For
+example, if you are building the core library with GPU support
+(-DTRITON_ENABLE_GPU=ON), then you must install the CUDA, cuDNN, and
+TensorRT dependencies required for the version of Triton you are
+building.
+
+To build, first clone the release branch matching the Triton release
+you are interest in (*rxx.yy*), or the *main* branch to build the
+top-of-tree. The Triton core library is built with CMake.
+
+```
+$ mkdir build
+$ cd build
+$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_CORE_HEADERS_ONLY=OFF ..
+$ make install
+```
+
+When the build completes, the install directory will contain the
+Triton core shared library (install/lib/libtritonserver.so on Linux,
+install/bin/tritonserver.dll on Windows), and the core library headers
+files in install/include/triton/core.
+
+### Build a Release Branch
+
+The following required Triton repositories will be pulled and used in
+the build. By default the "main" branch/tag will be used for each repo
+but the listed CMake argument can be used to override.
+
+* triton-inference-server/third_party: -DTRITON_THIRD_PARTY_REPO_TAG=[tag]
+* triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag]
+
+You will need to override if you are building from a release
+branch. For example, if you are building the r22.03 version of Triton,
+you would clone the r22.03 branch of the core repo and use the
+following cmake command.
+
+```
+$ cmake -DTRITON_THIRD_PARTY_REPO_TAG=r22.03 -DTRITON_COMMON_REPO_TAG=r22.03 -DTRITON_CORE_HEADERS_ONLY=OFF ..
+```
+
+### Build Options
+
+The [CMakeLists.txt](CMakeLists.txt) file contains the options
+available when build the core library. For example, to build the core
+library with the default settings plus S3 cloud storage and ensembling
+support use the following command.
+
+```
+$ cmake -DTRITON_CORE_HEADERS_ONLY=OFF -DTRITON_ENABLE_S3=ON -DTRITON_ENABLE_ENSEMBLE=ON ..
+```