重新整理工程

fcefbf3d · xiabo · d592fbea · d592fbea · d592fbea · d592fbea
Commit fcefbf3d authored Nov 30, 2023 by xiabo
20 changed files
--- a/3rdparty/core-r22.12/src/infer_trace.h
+++ b/3rdparty/core-r22.12/src/infer_trace.h
-// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <atomic>
-#include <chrono>
-#include <memory>
-#include "constants.h"
-#include "status.h"
-#include "tritonserver_apis.h"
-
-namespace triton { namespace core {
-
-#ifdef TRITON_ENABLE_TRACING
-
-//
-// InferenceTrace
-//
-// Interface to TRITONSERVER_InferenceTrace to report trace events.
-//
-class InferenceTrace {
- public:
-  InferenceTrace(
-      const TRITONSERVER_InferenceTraceLevel level, const uint64_t parent_id,
-      TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
-      TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn,
-      TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void* userp)
-      : level_(level), id_(next_id_++), parent_id_(parent_id),
-        activity_fn_(activity_fn), tensor_activity_fn_(tensor_activity_fn),
-        release_fn_(release_fn), userp_(userp)
-  {
-  }
-
-  InferenceTrace* SpawnChildTrace();
-
-  int64_t Id() const { return id_; }
-  int64_t ParentId() const { return parent_id_; }
-
-  const std::string& ModelName() const { return model_name_; }
-  int64_t ModelVersion() const { return model_version_; }
-
-  void SetModelName(const std::string& n) { model_name_ = n; }
-  void SetModelVersion(int64_t v) { model_version_ = v; }
-
-  // Report trace activity.
-  void Report(
-      const TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns)
-  {
-    if ((level_ & TRITONSERVER_TRACE_LEVEL_TIMESTAMPS) > 0) {
-      activity_fn_(
-          reinterpret_cast<TRITONSERVER_InferenceTrace*>(this), activity,
-          timestamp_ns, userp_);
-    }
-  }
-
-  // Report trace activity at the current time.
-  void ReportNow(const TRITONSERVER_InferenceTraceActivity activity)
-  {
-    if ((level_ & TRITONSERVER_TRACE_LEVEL_TIMESTAMPS) > 0) {
-      Report(
-          activity, std::chrono::duration_cast<std::chrono::nanoseconds>(
-                        std::chrono::steady_clock::now().time_since_epoch())
-                        .count());
-    }
-  }
-
-  // Report tensor trace activity.
-  void ReportTensor(
-      const TRITONSERVER_InferenceTraceActivity activity, const char* name,
-      TRITONSERVER_DataType datatype, const void* base, size_t byte_size,
-      const int64_t* shape, uint64_t dim_count,
-      TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
-  {
-    if ((level_ & TRITONSERVER_TRACE_LEVEL_TENSORS) > 0) {
-      tensor_activity_fn_(
-          reinterpret_cast<TRITONSERVER_InferenceTrace*>(this), activity, name,
-          datatype, base, byte_size, shape, dim_count, memory_type,
-          memory_type_id, userp_);
-    }
-  }
-
-  // Release the trace. Call the trace release callback.
-  void Release();
-
- private:
-  const TRITONSERVER_InferenceTraceLevel level_;
-  const uint64_t id_;
-  const uint64_t parent_id_;
-
-  TRITONSERVER_InferenceTraceActivityFn_t activity_fn_;
-  TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn_;
-  TRITONSERVER_InferenceTraceReleaseFn_t release_fn_;
-  void* userp_;
-
-  std::string model_name_;
-  int64_t model_version_;
-
-  // Maintain next id statically so that trace id is unique even
-  // across traces
-  static std::atomic<uint64_t> next_id_;
-};
-
-//
-// InferenceTraceProxy
-//
-// Object attached as shared_ptr to InferenceRequest and
-// InferenceResponse(s) being traced as part of a single inference
-// request.
-//
-class InferenceTraceProxy {
- public:
-  InferenceTraceProxy(InferenceTrace* trace) : trace_(trace) {}
-  ~InferenceTraceProxy() { trace_->Release(); }
-  int64_t Id() const { return trace_->Id(); }
-  int64_t ParentId() const { return trace_->ParentId(); }
-  const std::string& ModelName() const { return trace_->ModelName(); }
-  int64_t ModelVersion() const { return trace_->ModelVersion(); }
-  void SetModelName(const std::string& n) { trace_->SetModelName(n); }
-  void SetModelVersion(int64_t v) { trace_->SetModelVersion(v); }
-
-  void Report(
-      const TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns)
-  {
-    trace_->Report(activity, timestamp_ns);
-  }
-
-  void ReportNow(const TRITONSERVER_InferenceTraceActivity activity)
-  {
-    trace_->ReportNow(activity);
-  }
-
-  void ReportTensor(
-      const TRITONSERVER_InferenceTraceActivity activity, const char* name,
-      TRITONSERVER_DataType datatype, const void* base, size_t byte_size,
-      const int64_t* shape, uint64_t dim_count,
-      TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
-  {
-    trace_->ReportTensor(
-        activity, name, datatype, base, byte_size, shape, dim_count,
-        memory_type, memory_type_id);
-  }
-
-  std::shared_ptr<InferenceTraceProxy> SpawnChildTrace();
-
- private:
-  InferenceTrace* trace_;
-};
-
-#endif  // TRITON_ENABLE_TRACING
-
-//
-// Macros to generate trace activity
-//
-#ifdef TRITON_ENABLE_TRACING
-#define INFER_TRACE_ACTIVITY(T, A, TS_NS) \
-  {                                       \
-    const auto& trace = (T);              \
-    const auto ts_ns = (TS_NS);           \
-    if (trace != nullptr) {               \
-      trace->Report(A, ts_ns);            \
-    }                                     \
-  }
-#define INFER_TRACE_ACTIVITY_NOW(T, A) \
-  {                                    \
-    const auto& trace = (T);           \
-    if (trace != nullptr) {            \
-      trace->ReportNow(A);             \
-    }                                  \
-  }
-#define INFER_TRACE_TENSOR_ACTIVITY(T, A, N, D, BA, BY, S, DI, MT, MTI) \
-  {                                                                     \
-    const auto& trace = (T);                                            \
-    if (trace != nullptr) {                                             \
-      trace->ReportTensor(A, N, D, BA, BY, S, DI, MT, MTI);             \
-    }                                                                   \
-  }
-#else
-#define INFER_TRACE_ACTIVITY(T, A, TS_NS)
-#define INFER_TRACE_ACTIVITY_NOW(T, A)
-#define INFER_TRACE_TENSOR_ACTIVITY(T, A, N, D, BA, BY, S, DI, MT, MTI)
-#endif  // TRITON_ENABLE_TRACING
-}}      // namespace triton::core
--- a/3rdparty/core-r22.12/src/instance_queue.cc
+++ b/3rdparty/core-r22.12/src/instance_queue.cc
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "instance_queue.h"
-
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-InstanceQueue::InstanceQueue(size_t max_batch_size, uint64_t max_queue_delay_ns)
-    : max_batch_size_(max_batch_size), max_queue_delay_ns_(max_queue_delay_ns)
-{
-}
-
-size_t
-InstanceQueue::Size()
-{
-  return payload_queue_.size();
-}
-
-bool
-InstanceQueue::Empty()
-{
-  return payload_queue_.empty();
-}
-
-void
-InstanceQueue::Enqueue(const std::shared_ptr<Payload>& payload)
-{
-  payload_queue_.push_back(payload);
-}
-
-void
-InstanceQueue::Dequeue(
-    std::shared_ptr<Payload>* payload,
-    std::vector<std::shared_ptr<Payload>>* merged_payloads)
-{
-  *payload = payload_queue_.front();
-  payload_queue_.pop_front();
-  {
-    std::lock_guard<std::mutex> exec_lock(*((*payload)->GetExecMutex()));
-    (*payload)->SetState(Payload::State::EXECUTING);
-    if ((!payload_queue_.empty()) && (max_queue_delay_ns_ > 0) &&
-        (max_batch_size_ > 1) && (!(*payload)->IsSaturated())) {
-      bool continue_merge;
-      do {
-        continue_merge = false;
-        uint64_t now_ns =
-            std::chrono::duration_cast<std::chrono::nanoseconds>(
-                std::chrono::steady_clock::now().time_since_epoch())
-                .count();
-        size_t batch_size = (*payload)->BatchSize();
-        if ((!payload_queue_.empty()) &&
-            (!payload_queue_.front()->IsSaturated()) &&
-            (now_ns - payload_queue_.front()->BatcherStartNs()) >
-                max_queue_delay_ns_) {
-          std::lock_guard<std::mutex> exec_lock(
-              *(payload_queue_.front()->GetExecMutex()));
-          payload_queue_.front()->SetState(Payload::State::EXECUTING);
-          size_t front_batch_size = payload_queue_.front()->BatchSize();
-          if ((batch_size + front_batch_size) <= max_batch_size_) {
-            const auto& status =
-                (*payload)->MergePayload(payload_queue_.front());
-            if (status.IsOk()) {
-              merged_payloads->push_back(payload_queue_.front());
-              payload_queue_.pop_front();
-              continue_merge = true;
-            }
-          }
-        }
-      } while (continue_merge);
-    }
-  }
-}
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/instance_queue.h
+++ b/3rdparty/core-r22.12/src/instance_queue.h
-// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "payload.h"
-
-namespace triton { namespace core {
-
-//
-// InstanceQueue
-//
-// A queue implementation holding Payloads ready to be scheduled on
-// model instance.
-class InstanceQueue {
- public:
-  explicit InstanceQueue(size_t max_batch_size, uint64_t max_queue_delay_ns);
-
-  size_t Size();
-  bool Empty();
-  void Enqueue(const std::shared_ptr<Payload>& payload);
-  void Dequeue(
-      std::shared_ptr<Payload>* payload,
-      std::vector<std::shared_ptr<Payload>>* merged_payloads);
-
- private:
-  size_t max_batch_size_;
-  uint64_t max_queue_delay_ns_;
-
-  std::deque<std::shared_ptr<Payload>> payload_queue_;
-  std::shared_ptr<Payload> staged_payload_;
-  std::mutex mu_;
-};
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/label_provider.cc
+++ b/3rdparty/core-r22.12/src/label_provider.cc
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "label_provider.h"
-
-#include <iostream>
-#include <iterator>
-#include <sstream>
-#include "filesystem.h"
-
-namespace triton { namespace core {
-
-const std::string&
-LabelProvider::GetLabel(const std::string& name, size_t index) const
-{
-  static const std::string not_found;
-
-  auto itr = label_map_.find(name);
-  if (itr == label_map_.end()) {
-    return not_found;
-  }
-
-  if (itr->second.size() <= index) {
-    return not_found;
-  }
-
-  return itr->second[index];
-}
-
-Status
-LabelProvider::AddLabels(const std::string& name, const std::string& filepath)
-{
-  std::string label_file_contents;
-  RETURN_IF_ERROR(ReadTextFile(filepath, &label_file_contents));
-
-  auto p = label_map_.insert(std::make_pair(name, std::vector<std::string>()));
-  if (!p.second) {
-    return Status(
-        Status::Code::INTERNAL, "multiple label files for '" + name + "'");
-  }
-
-  auto itr = p.first;
-
-  std::istringstream label_file_stream(label_file_contents);
-  std::string line;
-  while (std::getline(label_file_stream, line)) {
-    itr->second.push_back(line);
-  }
-
-  return Status::Success;
-}
-
-const std::vector<std::string>&
-LabelProvider::GetLabels(const std::string& name)
-{
-  static const std::vector<std::string> not_found;
-  auto itr = label_map_.find(name);
-  if (itr == label_map_.end()) {
-    return not_found;
-  }
-  return itr->second;
-}
-
-Status
-LabelProvider::AddLabels(
-    const std::string& name, const std::vector<std::string>& labels)
-{
-  label_map_.emplace(name, labels);
-  return Status::Success;
-}
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/label_provider.h
+++ b/3rdparty/core-r22.12/src/label_provider.h
-// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-#include <unordered_map>
-#include <vector>
-#include "constants.h"
-#include "status.h"
-
-namespace triton { namespace core {
-
-// Provides classification labels.
-class LabelProvider {
- public:
-  LabelProvider() = default;
-
-  // Return the label associated with 'name' for a given
-  // 'index'. Return empty string if no label is available.
-  const std::string& GetLabel(const std::string& name, size_t index) const;
-
-  // Associate with 'name' a set of labels initialized from a given
-  // 'filepath'. Within the file each label is specified on its own
-  // line. The first label (line 0) is the index-0 label, the second
-  // label (line 1) is the index-1 label, etc.
-  Status AddLabels(const std::string& name, const std::string& filepath);
-
-  // Return the labels associated with 'name'. Return empty vector if no labels
-  // are available.
-  const std::vector<std::string>& GetLabels(const std::string& name);
-
-  // Associate with 'name' a set of 'labels'
-  Status AddLabels(
-      const std::string& name, const std::vector<std::string>& labels);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(LabelProvider);
-
-  std::unordered_map<std::string, std::vector<std::string>> label_map_;
-};
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/libtritonserver.ldscript
+++ b/3rdparty/core-r22.12/src/libtritonserver.ldscript
-# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-{
-  global:
-    TRITONSERVER_*;
-    TRITONBACKEND_*;
-    TRITONREPOAGENT_*;
-  local: *;
-};
--- a/3rdparty/core-r22.12/src/memory.cc
+++ b/3rdparty/core-r22.12/src/memory.cc
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "memory.h"
-
-#include "pinned_memory_manager.h"
-#include "triton/common/logging.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include <cuda_runtime_api.h>
-#include "cuda_memory_manager.h"
-#endif  // TRITON_ENABLE_GPU
-
-namespace triton { namespace core {
-
-//
-// MemoryReference
-//
-MemoryReference::MemoryReference() : Memory() {}
-
-const char*
-MemoryReference::BufferAt(
-    size_t idx, size_t* byte_size, TRITONSERVER_MemoryType* memory_type,
-    int64_t* memory_type_id) const
-{
-  if (idx >= buffer_.size()) {
-    *byte_size = 0;
-    *memory_type = TRITONSERVER_MEMORY_CPU;
-    *memory_type_id = 0;
-    return nullptr;
-  }
-  *memory_type = buffer_[idx].buffer_attributes_.MemoryType();
-  *memory_type_id = buffer_[idx].buffer_attributes_.MemoryTypeId();
-  *byte_size = buffer_[idx].buffer_attributes_.ByteSize();
-  return buffer_[idx].buffer_;
-}
-
-const char*
-MemoryReference::BufferAt(size_t idx, BufferAttributes** buffer_attributes)
-{
-  if (idx >= buffer_.size()) {
-    *buffer_attributes = nullptr;
-    return nullptr;
-  }
-
-  *buffer_attributes = &(buffer_[idx].buffer_attributes_);
-  return buffer_[idx].buffer_;
-}
-
-size_t
-MemoryReference::AddBuffer(
-    const char* buffer, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  total_byte_size_ += byte_size;
-  buffer_count_++;
-  buffer_.emplace_back(buffer, byte_size, memory_type, memory_type_id);
-  return buffer_.size() - 1;
-}
-
-size_t
-MemoryReference::AddBuffer(
-    const char* buffer, BufferAttributes* buffer_attributes)
-{
-  total_byte_size_ += buffer_attributes->ByteSize();
-  buffer_count_++;
-  buffer_.emplace_back(buffer, buffer_attributes);
-  return buffer_.size() - 1;
-}
-
-size_t
-MemoryReference::AddBufferFront(
-    const char* buffer, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  total_byte_size_ += byte_size;
-  buffer_count_++;
-  buffer_.emplace(
-      buffer_.begin(), buffer, byte_size, memory_type, memory_type_id);
-  return buffer_.size() - 1;
-}
-
-//
-// MutableMemory
-//
-MutableMemory::MutableMemory(
-    char* buffer, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-    : Memory(), buffer_(buffer),
-      buffer_attributes_(
-          BufferAttributes(byte_size, memory_type, memory_type_id, nullptr))
-{
-  total_byte_size_ = byte_size;
-  buffer_count_ = (byte_size == 0) ? 0 : 1;
-}
-
-const char*
-MutableMemory::BufferAt(
-    size_t idx, size_t* byte_size, TRITONSERVER_MemoryType* memory_type,
-    int64_t* memory_type_id) const
-{
-  if (idx != 0) {
-    *byte_size = 0;
-    *memory_type = TRITONSERVER_MEMORY_CPU;
-    *memory_type_id = 0;
-    return nullptr;
-  }
-  *byte_size = total_byte_size_;
-  *memory_type = buffer_attributes_.MemoryType();
-  *memory_type_id = buffer_attributes_.MemoryTypeId();
-  return buffer_;
-}
-
-const char*
-MutableMemory::BufferAt(size_t idx, BufferAttributes** buffer_attributes)
-{
-  if (idx != 0) {
-    *buffer_attributes = nullptr;
-    return nullptr;
-  }
-
-  *buffer_attributes = &buffer_attributes_;
-  return buffer_;
-}
-
-char*
-MutableMemory::MutableBuffer(
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
-{
-  if (memory_type != nullptr) {
-    *memory_type = buffer_attributes_.MemoryType();
-  }
-  if (memory_type_id != nullptr) {
-    *memory_type_id = buffer_attributes_.MemoryTypeId();
-  }
-
-  return buffer_;
-}
-
-//
-// AllocatedMemory
-//
-AllocatedMemory::AllocatedMemory(
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-    : MutableMemory(nullptr, byte_size, memory_type, memory_type_id)
-{
-  if (total_byte_size_ != 0) {
-    // Allocate memory with the following fallback policy:
-    // CUDA memory -> pinned system memory -> non-pinned system memory
-    switch (buffer_attributes_.MemoryType()) {
-#ifdef TRITON_ENABLE_GPU
-      case TRITONSERVER_MEMORY_GPU: {
-        auto status = CudaMemoryManager::Alloc(
-            (void**)&buffer_, total_byte_size_,
-            buffer_attributes_.MemoryTypeId());
-        if (!status.IsOk()) {
-          static bool warning_logged = false;
-          if (!warning_logged) {
-            LOG_WARNING << status.Message()
-                        << ", falling back to pinned system memory";
-            warning_logged = true;
-          }
-
-          goto pinned_memory_allocation;
-        }
-        break;
-      }
-      pinned_memory_allocation:
-#endif  // TRITON_ENABLE_GPU
-      default: {
-        TRITONSERVER_MemoryType memory_type = buffer_attributes_.MemoryType();
-        auto status = PinnedMemoryManager::Alloc(
-            (void**)&buffer_, total_byte_size_, &memory_type, true);
-        buffer_attributes_.SetMemoryType(memory_type);
-        if (!status.IsOk()) {
-          LOG_ERROR << status.Message();
-          buffer_ = nullptr;
-        }
-        break;
-      }
-    }
-  }
-  total_byte_size_ = (buffer_ == nullptr) ? 0 : total_byte_size_;
-}
-
-AllocatedMemory::~AllocatedMemory()
-{
-  if (buffer_ != nullptr) {
-    switch (buffer_attributes_.MemoryType()) {
-      case TRITONSERVER_MEMORY_GPU: {
-#ifdef TRITON_ENABLE_GPU
-        auto status =
-            CudaMemoryManager::Free(buffer_, buffer_attributes_.MemoryTypeId());
-        if (!status.IsOk()) {
-          LOG_ERROR << status.Message();
-        }
-#endif  // TRITON_ENABLE_GPU
-        break;
-      }
-
-      default: {
-        auto status = PinnedMemoryManager::Free(buffer_);
-        if (!status.IsOk()) {
-          LOG_ERROR << status.Message();
-          buffer_ = nullptr;
-        }
-        break;
-      }
-    }
-    buffer_ = nullptr;
-  }
-}
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/memory.h
+++ b/3rdparty/core-r22.12/src/memory.h
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <vector>
-#include "buffer_attributes.h"
-#include "constants.h"
-#include "status.h"
-
-namespace triton { namespace core {
-
-//
-// Memory used to access data in inference requests
-//
-class Memory {
- public:
-  // Get the 'idx'-th data block in the buffer. Using index to avoid
-  // maintaining internal state such that one buffer can be shared
-  // across multiple providers.
-  // 'idx' zero base index. Valid indices are continuous.
-  // 'byte_size' returns the byte size of the chunk of bytes.
-  // 'memory_type' returns the memory type of the chunk of bytes.
-  // 'memory_type_id' returns the memory type id of the chunk of bytes.
-  // Return the pointer to the data block. Returns nullptr if 'idx' is
-  // out of range
-  virtual const char* BufferAt(
-      size_t idx, size_t* byte_size, TRITONSERVER_MemoryType* memory_type,
-      int64_t* memory_type_id) const = 0;
-
-  // Similar to the above BufferAt but with BufferAttributes.
-  virtual const char* BufferAt(
-      size_t idx, BufferAttributes** buffer_attributes) = 0;
-
-  // Get the number of contiguous buffers composing the memory.
-  size_t BufferCount() const { return buffer_count_; }
-
-  // Return the total byte size of the data buffer
-  size_t TotalByteSize() const { return total_byte_size_; }
-
- protected:
-  Memory() : total_byte_size_(0), buffer_count_(0) {}
-  size_t total_byte_size_;
-  size_t buffer_count_;
-};
-
-//
-// MemoryReference
-//
-class MemoryReference : public Memory {
- public:
-  // Create a read-only data buffer as a reference to other data buffer
-  MemoryReference();
-
-  //\see Memory::BufferAt()
-  const char* BufferAt(
-      size_t idx, size_t* byte_size, TRITONSERVER_MemoryType* memory_type,
-      int64_t* memory_type_id) const override;
-
-  const char* BufferAt(
-      size_t idx, BufferAttributes** buffer_attributes) override;
-
-  // Add a 'buffer' with 'byte_size' as part of this data buffer
-  // Return the index of the buffer
-  size_t AddBuffer(
-      const char* buffer, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-      int64_t memory_type_id);
-
-  size_t AddBuffer(const char* buffer, BufferAttributes* buffer_attributes);
-
-  // Add a 'buffer' with 'byte_size' as part of this data buffer in the front
-  // Return the index of the buffer
-  size_t AddBufferFront(
-      const char* buffer, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-      int64_t memory_type_id);
-
- private:
-  struct Block {
-    Block(
-        const char* buffer, size_t byte_size,
-        TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
-        : buffer_(buffer), buffer_attributes_(BufferAttributes(
-                               byte_size, memory_type, memory_type_id, nullptr))
-    {
-    }
-
-    Block(const char* buffer, BufferAttributes* buffer_attributes)
-        : buffer_(buffer), buffer_attributes_(*buffer_attributes)
-    {
-    }
-    const char* buffer_;
-    BufferAttributes buffer_attributes_;
-  };
-  std::vector<Block> buffer_;
-};
-
-//
-// MutableMemory
-//
-class MutableMemory : public Memory {
- public:
-  // Create a mutable data buffer referencing to other data buffer.
-  MutableMemory(
-      char* buffer, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-      int64_t memory_type_id);
-
-  virtual ~MutableMemory() {}
-
-  //\see Memory::BufferAt()
-  const char* BufferAt(
-      size_t idx, size_t* byte_size, TRITONSERVER_MemoryType* memory_type,
-      int64_t* memory_type_id) const override;
-
-  //\see Memory::BufferAt()
-  const char* BufferAt(
-      size_t idx, BufferAttributes** buffer_attributes) override;
-
-  // Return a pointer to the base address of the mutable buffer. If
-  // non-null 'memory_type' returns the memory type of the chunk of
-  // bytes. If non-null 'memory_type_id' returns the memory type id of
-  // the chunk of bytes.
-  char* MutableBuffer(
-      TRITONSERVER_MemoryType* memory_type = nullptr,
-      int64_t* memory_type_id = nullptr);
-
-  DISALLOW_COPY_AND_ASSIGN(MutableMemory);
-
- protected:
-  MutableMemory() : Memory() {}
-
-  char* buffer_;
-  BufferAttributes buffer_attributes_;
-};
-
-//
-// AllocatedMemory
-//
-class AllocatedMemory : public MutableMemory {
- public:
-  // Create a continuous data buffer with 'byte_size', 'memory_type' and
-  // 'memory_type_id'. Note that the buffer may be created on different memeory
-  // type and memory type id if the original request type and id can not be
-  // satisfied, thus the function caller should always check the actual memory
-  // type and memory type id before use.
-  AllocatedMemory(
-      size_t byte_size, TRITONSERVER_MemoryType memory_type,
-      int64_t memory_type_id);
-
-  ~AllocatedMemory() override;
-};
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/metric_family.cc
+++ b/3rdparty/core-r22.12/src/metric_family.cc
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifdef TRITON_ENABLE_METRICS
-
-#include "metric_family.h"
-#include "metrics.h"
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-//
-// Implementation for TRITONSERVER_MetricFamily.
-//
-MetricFamily::MetricFamily(
-    TRITONSERVER_MetricKind kind, const char* name, const char* description)
-{
-  auto registry = Metrics::GetRegistry();
-
-  switch (kind) {
-    case TRITONSERVER_METRIC_KIND_COUNTER:
-      family_ = reinterpret_cast<void*>(&prometheus::BuildCounter()
-                                             .Name(name)
-                                             .Help(description)
-                                             .Register(*registry));
-      break;
-    case TRITONSERVER_METRIC_KIND_GAUGE:
-      family_ = reinterpret_cast<void*>(&prometheus::BuildGauge()
-                                             .Name(name)
-                                             .Help(description)
-                                             .Register(*registry));
-      break;
-    default:
-      throw std::invalid_argument(
-          "Unsupported kind passed to MetricFamily constructor.");
-  }
-
-  kind_ = kind;
-}
-
-void*
-MetricFamily::Add(std::map<std::string, std::string> label_map, Metric* metric)
-{
-  void* prom_metric = nullptr;
-  switch (kind_) {
-    case TRITONSERVER_METRIC_KIND_COUNTER: {
-      auto counter_family_ptr =
-          reinterpret_cast<prometheus::Family<prometheus::Counter>*>(family_);
-      auto counter_ptr = &counter_family_ptr->Add(label_map);
-      prom_metric = reinterpret_cast<void*>(counter_ptr);
-      break;
-    }
-    case TRITONSERVER_METRIC_KIND_GAUGE: {
-      auto gauge_family_ptr =
-          reinterpret_cast<prometheus::Family<prometheus::Gauge>*>(family_);
-      auto gauge_ptr = &gauge_family_ptr->Add(label_map);
-      prom_metric = reinterpret_cast<void*>(gauge_ptr);
-      break;
-    }
-    default:
-      throw std::invalid_argument(
-          "Unsupported family kind passed to Metric constructor.");
-  }
-
-  std::lock_guard<std::mutex> lk(metric_mtx_);
-  ++prom_metric_ref_cnt_[prom_metric];
-  child_metrics_.insert(metric);
-  return prom_metric;
-}
-
-void
-MetricFamily::Remove(void* prom_metric, Metric* metric)
-{
-  {
-    // Remove reference to dependent Metric object
-    std::lock_guard<std::mutex> lk(metric_mtx_);
-    child_metrics_.erase(metric);
-  }
-
-  if (prom_metric == nullptr) {
-    return;
-  }
-
-  {
-    std::lock_guard<std::mutex> lk(metric_mtx_);
-    const auto it = prom_metric_ref_cnt_.find(prom_metric);
-    if (it != prom_metric_ref_cnt_.end()) {
-      --it->second;
-      if (it->second == 0) {
-        prom_metric_ref_cnt_.erase(it);
-      } else {
-        // Done as it is not the last reference
-        return;
-      }
-    }
-  }
-
-  switch (kind_) {
-    case TRITONSERVER_METRIC_KIND_COUNTER: {
-      auto counter_family_ptr =
-          reinterpret_cast<prometheus::Family<prometheus::Counter>*>(family_);
-      auto counter_ptr = reinterpret_cast<prometheus::Counter*>(prom_metric);
-      counter_family_ptr->Remove(counter_ptr);
-      break;
-    }
-    case TRITONSERVER_METRIC_KIND_GAUGE: {
-      auto gauge_family_ptr =
-          reinterpret_cast<prometheus::Family<prometheus::Gauge>*>(family_);
-      auto gauge_ptr = reinterpret_cast<prometheus::Gauge*>(prom_metric);
-      gauge_family_ptr->Remove(gauge_ptr);
-      break;
-    }
-    default:
-      // Invalid kind should be caught in constructor
-      LOG_ERROR << "Unsupported kind in Metric destructor.";
-      break;
-  }
-}
-
-void
-MetricFamily::InvalidateReferences()
-{
-  std::lock_guard<std::mutex> lk(metric_mtx_);
-  for (auto& metric : child_metrics_) {
-    if (metric != nullptr) {
-      metric->Invalidate();
-    }
-  }
-  child_metrics_.clear();
-}
-
-MetricFamily::~MetricFamily()
-{
-  if (NumMetrics() > 0) {
-    LOG_WARNING << "MetricFamily was deleted before its child Metrics, this "
-                   "should not happen. Make sure to delete all child Metrics "
-                   "before deleting their MetricFamily.";
-  }
-  InvalidateReferences();
-  // DLIS-4072: Support for removing metric families from registry
-}
-
-//
-// Implementation for TRITONSERVER_Metric.
-//
-Metric::Metric(
-    TRITONSERVER_MetricFamily* family,
-    std::vector<const InferenceParameter*> labels)
-{
-  family_ = reinterpret_cast<MetricFamily*>(family);
-  kind_ = family_->Kind();
-
-  // Create map of labels from InferenceParameters
-  std::map<std::string, std::string> label_map;
-  for (const auto& param : labels) {
-    if (param->Type() != TRITONSERVER_PARAMETER_STRING) {
-      throw std::invalid_argument(
-          "Parameter [" + param->Name() +
-          "] must have a type of TRITONSERVER_PARAMETER_STRING to be "
-          "added as a label.");
-    }
-
-    label_map[param->Name()] =
-        std::string(reinterpret_cast<const char*>(param->ValuePointer()));
-  }
-
-  metric_ = family_->Add(label_map, this);
-}
-
-Metric::~Metric()
-{
-  if (family_ != nullptr) {
-    family_->Remove(metric_, this);
-  } else {
-    LOG_WARNING << "Corresponding MetricFamily was deleted before this Metric, "
-                   "this should not happen. Make sure to delete a Metric "
-                   "before deleting its MetricFamily.";
-  }
-  // Catch lifetime management / invalid reference issues
-  Invalidate();
-}
-
-void
-Metric::Invalidate()
-{
-  family_ = nullptr;
-  metric_ = nullptr;
-}
-
-TRITONSERVER_Error*
-Metric::Value(double* value)
-{
-  if (metric_ == nullptr) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        "Could not get metric value. Metric has been invalidated.");
-  }
-
-  switch (kind_) {
-    case TRITONSERVER_METRIC_KIND_COUNTER: {
-      auto counter_ptr = reinterpret_cast<prometheus::Counter*>(metric_);
-      LOG_VERBOSE(1) << "SETTING COUNTER METRIC FROM: " << *value << " to "
-                     << counter_ptr->Value();
-      *value = counter_ptr->Value();
-      break;
-    }
-    case TRITONSERVER_METRIC_KIND_GAUGE: {
-      auto gauge_ptr = reinterpret_cast<prometheus::Gauge*>(metric_);
-      LOG_VERBOSE(1) << "SETTING GAUGE METRIC FROM: " << *value << " to "
-                     << gauge_ptr->Value();
-      *value = gauge_ptr->Value();
-      break;
-    }
-    default:
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED,
-          "Unsupported TRITONSERVER_MetricKind");
-  }
-
-  return nullptr;  // Success
-}
-
-TRITONSERVER_Error*
-Metric::Increment(double value)
-{
-  if (metric_ == nullptr) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        "Could not increment metric value. Metric has been invalidated.");
-  }
-
-  switch (kind_) {
-    case TRITONSERVER_METRIC_KIND_COUNTER: {
-      if (value < 0.0) {
-        return TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_INVALID_ARG,
-            "TRITONSERVER_METRIC_KIND_COUNTER can only be incremented "
-            "monotonically by non-negative values.");
-      }
-
-      auto counter_ptr = reinterpret_cast<prometheus::Counter*>(metric_);
-      counter_ptr->Increment(value);
-      break;
-    }
-    case TRITONSERVER_METRIC_KIND_GAUGE: {
-      auto gauge_ptr = reinterpret_cast<prometheus::Gauge*>(metric_);
-      // Gauge::Increment works for both positive and negative values as of
-      // prometheus-cpp v1.0 but for now on v0.7 we defer call to
-      // Increment/Decrement based on the sign of value
-      // https://github.com/jupp0r/prometheus-cpp/blob/master/core/src/gauge.cc
-      if (value < 0.0) {
-        gauge_ptr->Decrement(-1.0 * value);
-      } else {
-        gauge_ptr->Increment(value);
-      }
-      break;
-    }
-    default:
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED,
-          "Unsupported TRITONSERVER_MetricKind");
-  }
-
-  return nullptr;  // Success
-}
-
-TRITONSERVER_Error*
-Metric::Set(double value)
-{
-  if (metric_ == nullptr) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        "Could not set metric value. Metric has been invalidated.");
-  }
-
-  switch (kind_) {
-    case TRITONSERVER_METRIC_KIND_COUNTER: {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED,
-          "TRITONSERVER_METRIC_KIND_COUNTER does not support Set");
-    }
-    case TRITONSERVER_METRIC_KIND_GAUGE: {
-      auto gauge_ptr = reinterpret_cast<prometheus::Gauge*>(metric_);
-      gauge_ptr->Set(value);
-      break;
-    }
-    default:
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED,
-          "Unsupported TRITONSERVER_MetricKind");
-  }
-
-  return nullptr;  // Success
-}
-
-}}  // namespace triton::core
-
-#endif  // TRITON_ENABLE_METRICS
--- a/3rdparty/core-r22.12/src/metric_family.h
+++ b/3rdparty/core-r22.12/src/metric_family.h
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#ifdef TRITON_ENABLE_METRICS
-
-#include <mutex>
-#include <set>
-#include <unordered_map>
-
-#include "infer_parameter.h"
-#include "prometheus/registry.h"
-#include "tritonserver_apis.h"
-
-namespace triton { namespace core {
-
-//
-// Implementation for TRITONSERVER_MetricFamily.
-//
-class Metric;
-class MetricFamily {
- public:
-  MetricFamily(
-      TRITONSERVER_MetricKind kind, const char* name, const char* description);
-  ~MetricFamily();
-
-  void* Family() const { return family_; }
-  TRITONSERVER_MetricKind Kind() const { return kind_; }
-
-  void* Add(std::map<std::string, std::string> label_map, Metric* metric);
-  void Remove(void* prom_metric, Metric* metric);
-
-  int NumMetrics()
-  {
-    std::lock_guard<std::mutex> lk(metric_mtx_);
-    return child_metrics_.size();
-  }
-
- private:
-  // If a MetricFamily is deleted before its dependent Metric, we want to
-  // invalidate the reference so we don't access invalid memory.
-  void InvalidateReferences();
-
-  void* family_;
-  TRITONSERVER_MetricKind kind_;
-  // Synchronize access of related metric objects
-  std::mutex metric_mtx_;
-  // Prometheus returns the existing metric pointer if the metric with the same
-  // set of labels are requested, as a result, different Metric objects may
-  // refer to the same prometheus metric. So we must track the reference count
-  // of the metric and request prometheus to remove it only when all references
-  // are released.
-  std::unordered_map<void*, size_t> prom_metric_ref_cnt_;
-  // Maintain references to metrics created from this metric family to
-  // invalidate their references if a family is deleted before its metric
-  std::set<Metric*> child_metrics_;
-};
-
-//
-// Implementation for TRITONSERVER_Metric.
-//
-class Metric {
- public:
-  Metric(
-      TRITONSERVER_MetricFamily* family,
-      std::vector<const InferenceParameter*> labels);
-  ~Metric();
-
-  MetricFamily* Family() const { return family_; }
-  TRITONSERVER_MetricKind Kind() const { return kind_; }
-
-  TRITONSERVER_Error* Value(double* value);
-  TRITONSERVER_Error* Increment(double value);
-  TRITONSERVER_Error* Set(double value);
-
-  // If a MetricFamily is deleted before its dependent Metric, we want to
-  // invalidate the references so we don't access invalid memory.
-  void Invalidate();
-
- private:
-  void* metric_;
-  MetricFamily* family_;
-  TRITONSERVER_MetricKind kind_;
-};
-
-}}  // namespace triton::core
-
-#endif  // TRITON_ENABLE_METRICS
--- a/3rdparty/core-r22.12/src/metric_model_reporter.cc
+++ b/3rdparty/core-r22.12/src/metric_model_reporter.cc
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "metric_model_reporter.h"
-
-#ifdef TRITON_ENABLE_METRICS
-
-#include "constants.h"
-#include "metrics.h"
-
-namespace triton { namespace core {
-
-Status
-MetricModelReporter::Create(
-    const std::string& model_name, const int64_t model_version,
-    const int device, const triton::common::MetricTagsMap& model_tags,
-    std::shared_ptr<MetricModelReporter>* metric_model_reporter)
-{
-  static std::mutex mtx;
-  static std::unordered_map<size_t, std::weak_ptr<MetricModelReporter>>
-      reporter_map;
-
-  std::map<std::string, std::string> labels;
-  GetMetricLabels(&labels, model_name, model_version, device, model_tags);
-  auto hash_labels = Metrics::HashLabels(labels);
-
-  std::lock_guard<std::mutex> lock(mtx);
-
-  const auto& itr = reporter_map.find(hash_labels);
-  if (itr != reporter_map.end()) {
-    // Found in map. If the weak_ptr is still valid that means that
-    // there are other models using the reporter and we just reuse that
-    // same reporter. If the weak_ptr is not valid then we need to remove
-    // the weak_ptr from the map and create the reporter again.
-    *metric_model_reporter = itr->second.lock();
-    if (*metric_model_reporter != nullptr) {
-      return Status::Success;
-    }
-
-    reporter_map.erase(itr);
-  }
-
-  metric_model_reporter->reset(
-      new MetricModelReporter(model_name, model_version, device, model_tags));
-  reporter_map.insert({hash_labels, *metric_model_reporter});
-  return Status::Success;
-}
-
-MetricModelReporter::MetricModelReporter(
-    const std::string& model_name, const int64_t model_version,
-    const int device, const triton::common::MetricTagsMap& model_tags)
-{
-  std::map<std::string, std::string> labels;
-  GetMetricLabels(&labels, model_name, model_version, device, model_tags);
-
-  metric_inf_success_ =
-      CreateCounterMetric(Metrics::FamilyInferenceSuccess(), labels);
-  metric_inf_failure_ =
-      CreateCounterMetric(Metrics::FamilyInferenceFailure(), labels);
-  metric_inf_count_ =
-      CreateCounterMetric(Metrics::FamilyInferenceCount(), labels);
-  metric_inf_exec_count_ =
-      CreateCounterMetric(Metrics::FamilyInferenceExecutionCount(), labels);
-  metric_inf_request_duration_us_ =
-      CreateCounterMetric(Metrics::FamilyInferenceRequestDuration(), labels);
-  metric_inf_queue_duration_us_ =
-      CreateCounterMetric(Metrics::FamilyInferenceQueueDuration(), labels);
-  metric_inf_compute_input_duration_us_ = CreateCounterMetric(
-      Metrics::FamilyInferenceComputeInputDuration(), labels);
-  metric_inf_compute_infer_duration_us_ = CreateCounterMetric(
-      Metrics::FamilyInferenceComputeInferDuration(), labels);
-  metric_inf_compute_output_duration_us_ = CreateCounterMetric(
-      Metrics::FamilyInferenceComputeOutputDuration(), labels);
-  metric_cache_hit_count_ =
-      CreateCounterMetric(Metrics::FamilyCacheHitCount(), labels);
-  metric_cache_hit_lookup_duration_us_ =
-      CreateCounterMetric(Metrics::FamilyCacheHitLookupDuration(), labels);
-  metric_cache_miss_count_ =
-      CreateCounterMetric(Metrics::FamilyCacheMissCount(), labels);
-  metric_cache_miss_lookup_duration_us_ =
-      CreateCounterMetric(Metrics::FamilyCacheMissLookupDuration(), labels);
-  metric_cache_miss_insertion_duration_us_ =
-      CreateCounterMetric(Metrics::FamilyCacheMissInsertionDuration(), labels);
-}
-
-MetricModelReporter::~MetricModelReporter()
-{
-  Metrics::FamilyInferenceSuccess().Remove(metric_inf_success_);
-  Metrics::FamilyInferenceFailure().Remove(metric_inf_failure_);
-  Metrics::FamilyInferenceCount().Remove(metric_inf_count_);
-  Metrics::FamilyInferenceExecutionCount().Remove(metric_inf_exec_count_);
-  Metrics::FamilyInferenceRequestDuration().Remove(
-      metric_inf_request_duration_us_);
-  Metrics::FamilyInferenceQueueDuration().Remove(metric_inf_queue_duration_us_);
-  Metrics::FamilyInferenceComputeInputDuration().Remove(
-      metric_inf_compute_input_duration_us_);
-  Metrics::FamilyInferenceComputeInferDuration().Remove(
-      metric_inf_compute_infer_duration_us_);
-  Metrics::FamilyInferenceComputeOutputDuration().Remove(
-      metric_inf_compute_output_duration_us_);
-  Metrics::FamilyCacheHitCount().Remove(metric_cache_hit_count_);
-  Metrics::FamilyCacheHitLookupDuration().Remove(
-      metric_cache_hit_lookup_duration_us_);
-  Metrics::FamilyCacheMissCount().Remove(metric_cache_miss_count_);
-  Metrics::FamilyCacheMissInsertionDuration().Remove(
-      metric_cache_miss_insertion_duration_us_);
-}
-
-void
-MetricModelReporter::GetMetricLabels(
-    std::map<std::string, std::string>* labels, const std::string& model_name,
-    const int64_t model_version, const int device,
-    const triton::common::MetricTagsMap& model_tags)
-{
-  labels->insert(std::map<std::string, std::string>::value_type(
-      std::string(kMetricsLabelModelName), model_name));
-  labels->insert(std::map<std::string, std::string>::value_type(
-      std::string(kMetricsLabelModelVersion), std::to_string(model_version)));
-  for (const auto& tag : model_tags) {
-    labels->insert(std::map<std::string, std::string>::value_type(
-        "_" + tag.first, tag.second));
-  }
-
-  // 'device' can be < 0 to indicate that the GPU is not known. In
-  // that case use a metric that doesn't have the gpu_uuid label.
-  if (device >= 0) {
-    std::string uuid;
-    if (Metrics::UUIDForCudaDevice(device, &uuid)) {
-      labels->insert(std::map<std::string, std::string>::value_type(
-          std::string(kMetricsLabelGpuUuid), uuid));
-    }
-  }
-}
-
-prometheus::Counter*
-MetricModelReporter::CreateCounterMetric(
-    prometheus::Family<prometheus::Counter>& family,
-    const std::map<std::string, std::string>& labels)
-{
-  return &family.Add(labels);
-}
-
-}}  // namespace triton::core
-
-#endif  // TRITON_ENABLE_METRICS
--- a/3rdparty/core-r22.12/src/metric_model_reporter.h
+++ b/3rdparty/core-r22.12/src/metric_model_reporter.h
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "status.h"
-#include "triton/common/model_config.h"
-
-#ifdef TRITON_ENABLE_METRICS
-#include "prometheus/registry.h"
-#endif  // TRITON_ENABLE_METRICS
-
-namespace triton { namespace core {
-
-//
-// Interface for a metric reporter for a given version of a model.
-//
-class MetricModelReporter {
- public:
-#ifdef TRITON_ENABLE_METRICS
-  static Status Create(
-      const std::string& model_name, const int64_t model_version,
-      const int device, const triton::common::MetricTagsMap& model_tags,
-      std::shared_ptr<MetricModelReporter>* metric_model_reporter);
-
-  ~MetricModelReporter();
-
-  // Get a metric for the given model, version and GPU index.
-  prometheus::Counter& MetricInferenceSuccess() const
-  {
-    return *metric_inf_success_;
-  }
-  prometheus::Counter& MetricInferenceFailure() const
-  {
-    return *metric_inf_failure_;
-  }
-  prometheus::Counter& MetricInferenceCount() const
-  {
-    return *metric_inf_count_;
-  }
-  prometheus::Counter& MetricInferenceExecutionCount() const
-  {
-    return *metric_inf_exec_count_;
-  }
-  prometheus::Counter& MetricInferenceRequestDuration() const
-  {
-    return *metric_inf_request_duration_us_;
-  }
-  prometheus::Counter& MetricInferenceQueueDuration() const
-  {
-    return *metric_inf_queue_duration_us_;
-  }
-  prometheus::Counter& MetricInferenceComputeInputDuration() const
-  {
-    return *metric_inf_compute_input_duration_us_;
-  }
-  prometheus::Counter& MetricInferenceComputeInferDuration() const
-  {
-    return *metric_inf_compute_infer_duration_us_;
-  }
-  prometheus::Counter& MetricInferenceComputeOutputDuration() const
-  {
-    return *metric_inf_compute_output_duration_us_;
-  }
-  prometheus::Counter& MetricCacheHitCount() const
-  {
-    return *metric_cache_hit_count_;
-  }
-  prometheus::Counter& MetricCacheHitLookupDuration() const
-  {
-    return *metric_cache_hit_lookup_duration_us_;
-  }
-  prometheus::Counter& MetricCacheMissCount() const
-  {
-    return *metric_cache_miss_count_;
-  }
-  prometheus::Counter& MetricCacheMissLookupDuration() const
-  {
-    return *metric_cache_miss_lookup_duration_us_;
-  }
-  prometheus::Counter& MetricCacheMissInsertionDuration() const
-  {
-    return *metric_cache_miss_insertion_duration_us_;
-  }
-
- private:
-  MetricModelReporter(
-      const std::string& model_name, const int64_t model_version,
-      const int device, const triton::common::MetricTagsMap& model_tags);
-
-  static void GetMetricLabels(
-      std::map<std::string, std::string>* labels, const std::string& model_name,
-      const int64_t model_version, const int device,
-      const triton::common::MetricTagsMap& model_tags);
-  prometheus::Counter* CreateCounterMetric(
-      prometheus::Family<prometheus::Counter>& family,
-      const std::map<std::string, std::string>& labels);
-
-  prometheus::Counter* metric_inf_success_;
-  prometheus::Counter* metric_inf_failure_;
-  prometheus::Counter* metric_inf_count_;
-  prometheus::Counter* metric_inf_exec_count_;
-  prometheus::Counter* metric_inf_request_duration_us_;
-  prometheus::Counter* metric_inf_queue_duration_us_;
-  prometheus::Counter* metric_inf_compute_input_duration_us_;
-  prometheus::Counter* metric_inf_compute_infer_duration_us_;
-  prometheus::Counter* metric_inf_compute_output_duration_us_;
-  prometheus::Counter* metric_cache_hit_count_;
-  prometheus::Counter* metric_cache_hit_lookup_duration_us_;
-  prometheus::Counter* metric_cache_miss_count_;
-  prometheus::Counter* metric_cache_miss_lookup_duration_us_;
-  prometheus::Counter* metric_cache_miss_insertion_duration_us_;
-#endif  // TRITON_ENABLE_METRICS
-};
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/metrics.cc
+++ b/3rdparty/core-r22.12/src/metrics.cc
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-
-#ifdef TRITON_ENABLE_METRICS
-
-#include "metrics.h"
-
-#include <thread>
-#include "constants.h"
-#include "prometheus/detail/utils.h"
-#include "triton/common/logging.h"
-
-#ifdef TRITON_ENABLE_METRICS_GPU
-#include <cuda_runtime_api.h>
-#include <dcgm_agent.h>
-#include <cstring>
-#include <set>
-#include <string>
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-namespace triton { namespace core {
-
-Metrics::Metrics()
-    : registry_(std::make_shared<prometheus::Registry>()),
-      serializer_(new prometheus::TextSerializer()),
-      inf_success_family_(
-          prometheus::BuildCounter()
-              .Name("nv_inference_request_success")
-              .Help("Number of successful inference requests, all batch sizes")
-              .Register(*registry_)),
-      inf_failure_family_(
-          prometheus::BuildCounter()
-              .Name("nv_inference_request_failure")
-              .Help("Number of failed inference requests, all batch sizes")
-              .Register(*registry_)),
-      inf_count_family_(prometheus::BuildCounter()
-                            .Name("nv_inference_count")
-                            .Help("Number of inferences performed (does not "
-                                  "include cached requests)")
-                            .Register(*registry_)),
-      inf_count_exec_family_(prometheus::BuildCounter()
-                                 .Name("nv_inference_exec_count")
-                                 .Help("Number of model executions performed "
-                                       "(does not include cached requests)")
-                                 .Register(*registry_)),
-      inf_request_duration_us_family_(
-          prometheus::BuildCounter()
-              .Name("nv_inference_request_duration_us")
-              .Help("Cumulative inference request duration in microseconds "
-                    "(includes cached requests)")
-              .Register(*registry_)),
-      inf_queue_duration_us_family_(
-          prometheus::BuildCounter()
-              .Name("nv_inference_queue_duration_us")
-              .Help("Cumulative inference queuing duration in microseconds "
-                    "(includes cached requests)")
-              .Register(*registry_)),
-      inf_compute_input_duration_us_family_(
-          prometheus::BuildCounter()
-              .Name("nv_inference_compute_input_duration_us")
-              .Help("Cumulative compute input duration in microseconds (does "
-                    "not include cached requests)")
-              .Register(*registry_)),
-      inf_compute_infer_duration_us_family_(
-          prometheus::BuildCounter()
-              .Name("nv_inference_compute_infer_duration_us")
-              .Help("Cumulative compute inference duration in microseconds "
-                    "(does not include cached requests)")
-              .Register(*registry_)),
-      inf_compute_output_duration_us_family_(
-          prometheus::BuildCounter()
-              .Name("nv_inference_compute_output_duration_us")
-              .Help("Cumulative inference compute output duration in "
-                    "microseconds (does not include cached requests)")
-              .Register(*registry_)),
-      cache_num_entries_family_(
-          prometheus::BuildGauge()
-              .Name("nv_cache_num_entries")
-              .Help("Number of responses stored in response cache")
-              .Register(*registry_)),
-      cache_num_lookups_family_(
-          prometheus::BuildGauge()
-              .Name("nv_cache_num_lookups")
-              .Help("Number of cache lookups in response cache")
-              .Register(*registry_)),
-      cache_num_hits_family_(prometheus::BuildGauge()
-                                 .Name("nv_cache_num_hits")
-                                 .Help("Number of cache hits in response cache")
-                                 .Register(*registry_)),
-      cache_num_misses_family_(
-          prometheus::BuildGauge()
-              .Name("nv_cache_num_misses")
-              .Help("Number of cache misses in response cache")
-              .Register(*registry_)),
-      cache_num_evictions_family_(
-          prometheus::BuildGauge()
-              .Name("nv_cache_num_evictions")
-              .Help("Number of cache evictions in response cache")
-              .Register(*registry_)),
-      cache_lookup_duration_us_family_(
-          prometheus::BuildGauge()
-              .Name("nv_cache_lookup_duration")
-              .Help(
-                  "Total cache lookup duration (hit and miss), in microseconds")
-              .Register(*registry_)),
-      cache_insertion_duration_us_family_(
-          prometheus::BuildGauge()
-              .Name("nv_cache_insertion_duration")
-              .Help("Total cache insertion duration, in microseconds")
-              .Register(*registry_)),
-      cache_util_family_(prometheus::BuildGauge()
-                             .Name("nv_cache_util")
-                             .Help("Cache utilization [0.0 - 1.0]")
-                             .Register(*registry_)),
-      // Per-model cache metric families
-      cache_num_hits_model_family_(prometheus::BuildCounter()
-                                       .Name("nv_cache_num_hits_per_model")
-                                       .Help("Number of cache hits per model")
-                                       .Register(*registry_)),
-      cache_hit_lookup_duration_us_model_family_(
-          prometheus::BuildCounter()
-              .Name("nv_cache_hit_lookup_duration_per_model")
-              .Help(
-                  "Total cache hit lookup duration per model, in microseconds")
-              .Register(*registry_)),
-      cache_num_misses_model_family_(
-          prometheus::BuildCounter()
-              .Name("nv_cache_num_misses_per_model")
-              .Help("Number of cache misses per model")
-              .Register(*registry_)),
-      cache_miss_lookup_duration_us_model_family_(
-          prometheus::BuildCounter()
-              .Name("nv_cache_miss_lookup_duration_per_model")
-              .Help(
-                  "Total cache miss lookup duration per model, in microseconds")
-              .Register(*registry_)),
-      cache_miss_insertion_duration_us_model_family_(
-          prometheus::BuildCounter()
-              .Name("nv_cache_miss_insertion_duration_per_model")
-              .Help("Total cache miss insertion duration per model, in "
-                    "microseconds")
-              .Register(*registry_)),
-
-#ifdef TRITON_ENABLE_METRICS_GPU
-      gpu_utilization_family_(prometheus::BuildGauge()
-                                  .Name("nv_gpu_utilization")
-                                  .Help("GPU utilization rate [0.0 - 1.0)")
-                                  .Register(*registry_)),
-      gpu_memory_total_family_(prometheus::BuildGauge()
-                                   .Name("nv_gpu_memory_total_bytes")
-                                   .Help("GPU total memory, in bytes")
-                                   .Register(*registry_)),
-      gpu_memory_used_family_(prometheus::BuildGauge()
-                                  .Name("nv_gpu_memory_used_bytes")
-                                  .Help("GPU used memory, in bytes")
-                                  .Register(*registry_)),
-      gpu_power_usage_family_(prometheus::BuildGauge()
-                                  .Name("nv_gpu_power_usage")
-                                  .Help("GPU power usage in watts")
-                                  .Register(*registry_)),
-      gpu_power_limit_family_(prometheus::BuildGauge()
-                                  .Name("nv_gpu_power_limit")
-                                  .Help("GPU power management limit in watts")
-                                  .Register(*registry_)),
-      gpu_energy_consumption_family_(
-          prometheus::BuildCounter()
-              .Name("nv_energy_consumption")
-              .Help("GPU energy consumption in joules since the Triton Server "
-                    "started")
-              .Register(*registry_)),
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-#ifdef TRITON_ENABLE_METRICS_CPU
-      cpu_utilization_family_(prometheus::BuildGauge()
-                                  .Name("nv_cpu_utilization")
-                                  .Help("CPU utilization rate [0.0 - 1.0]")
-                                  .Register(*registry_)),
-      cpu_memory_total_family_(prometheus::BuildGauge()
-                                   .Name("nv_cpu_memory_total_bytes")
-                                   .Help("CPU total memory (RAM), in bytes")
-                                   .Register(*registry_)),
-      cpu_memory_used_family_(prometheus::BuildGauge()
-                                  .Name("nv_cpu_memory_used_bytes")
-                                  .Help("CPU used memory (RAM), in bytes")
-                                  .Register(*registry_)),
-#endif  // TRITON_ENABLE_METRICS_CPU
-
-      metrics_enabled_(false), gpu_metrics_enabled_(false),
-      cpu_metrics_enabled_(false), cache_metrics_enabled_(false),
-      metrics_interval_ms_(2000)
-{
-}
-
-static prometheus::detail::LabelHasher label_hasher_;
-
-size_t
-Metrics::HashLabels(const std::map<std::string, std::string>& labels)
-{
-  return label_hasher_(labels);
-}
-
-Metrics::~Metrics()
-{
-  // Signal the cache thread to exit and then wait for it...
-  if (poll_thread_ != nullptr) {
-    poll_thread_exit_.store(true);
-    poll_thread_->join();
-#ifdef TRITON_ENABLE_METRICS_GPU
-    if (dcgm_metadata_.dcgm_initialized_) {
-      dcgmReturn_t derr;
-      // Group destroy will return an error if groupId invalid or dcgm not
-      // initialized or configured correctly
-      derr = dcgmGroupDestroy(
-          dcgm_metadata_.dcgm_handle_, dcgm_metadata_.groupId_);
-      if (derr != DCGM_ST_OK) {
-        LOG_WARNING << "Unable to destroy DCGM group: " << errorString(derr);
-      }
-
-      // Stop and shutdown DCGM
-      if (dcgm_metadata_.standalone_) {
-        derr = dcgmDisconnect(dcgm_metadata_.dcgm_handle_);
-      } else {
-        derr = dcgmStopEmbedded(dcgm_metadata_.dcgm_handle_);
-      }
-      if (derr != DCGM_ST_OK) {
-        LOG_WARNING << "Unable to stop DCGM: " << errorString(derr);
-      }
-      derr = dcgmShutdown();
-      if (derr != DCGM_ST_OK) {
-        LOG_WARNING << "Unable to shutdown DCGM: " << errorString(derr);
-      }
-    }
-#endif  // TRITON_ENABLE_METRICS_GPU
-  }
-}
-
-bool
-Metrics::Enabled()
-{
-  auto singleton = GetSingleton();
-  return singleton->metrics_enabled_;
-}
-
-void
-Metrics::EnableMetrics()
-{
-  auto singleton = GetSingleton();
-  singleton->metrics_enabled_ = true;
-}
-
-void
-Metrics::EnableCacheMetrics(
-    std::shared_ptr<RequestResponseCache> response_cache)
-{
-  auto singleton = GetSingleton();
-  // Ensure thread-safe enabling of Cache Metrics
-  std::lock_guard<std::mutex> lock(singleton->metrics_enabling_);
-  if (singleton->cache_metrics_enabled_) {
-    return;
-  }
-
-  singleton->InitializeCacheMetrics(response_cache);
-  singleton->cache_metrics_enabled_ = true;
-}
-
-void
-Metrics::EnableGPUMetrics()
-{
-  auto singleton = GetSingleton();
-  // Ensure thread-safe enabling of GPU Metrics
-  std::lock_guard<std::mutex> lock(singleton->metrics_enabling_);
-  if (singleton->gpu_metrics_enabled_) {
-    return;
-  }
-
-  if (std::getenv("TRITON_SERVER_CPU_ONLY") == nullptr) {
-    singleton->InitializeDcgmMetrics();
-  }
-
-  singleton->gpu_metrics_enabled_ = true;
-}
-
-void
-Metrics::EnableCpuMetrics()
-{
-  auto singleton = GetSingleton();
-  // Ensure thread-safe enabling of CPU Metrics
-  std::lock_guard<std::mutex> lock(singleton->metrics_enabling_);
-  if (singleton->cpu_metrics_enabled_) {
-    return;
-  }
-
-  singleton->InitializeCpuMetrics();
-  singleton->cpu_metrics_enabled_ = true;
-}
-
-void
-Metrics::SetMetricsInterval(uint64_t metrics_interval_ms)
-{
-  auto singleton = GetSingleton();
-  singleton->metrics_interval_ms_ = metrics_interval_ms;
-}
-
-void
-Metrics::StartPollingThreadSingleton(
-    std::shared_ptr<RequestResponseCache> response_cache)
-{
-  auto singleton = GetSingleton();
-
-  // Ensure thread-safe start of polling thread
-  std::lock_guard<std::mutex> lock(singleton->poll_thread_starting_);
-  if (singleton->poll_thread_started_) {
-    return;
-  }
-
-  // Start thread for polling cache/dcgm metrics
-  singleton->StartPollingThread(response_cache);
-
-  // Toggle flag so this function is only executed once
-  singleton->poll_thread_started_ = true;
-}
-
-bool
-Metrics::StartPollingThread(
-    std::shared_ptr<RequestResponseCache> response_cache)
-{
-  // Nothing to poll if no polling metrics enabled, don't spawn a thread
-  if (!cache_metrics_enabled_ && !gpu_metrics_enabled_ &&
-      !cpu_metrics_enabled_) {
-    LOG_WARNING << "No polling metrics (CPU, GPU, Cache) are enabled. Will not "
-                   "poll for them.";
-    return false;
-  }
-  poll_thread_exit_.store(false);
-
-  // Start a separate thread for polling metrics at specified interval
-  poll_thread_.reset(new std::thread([this, response_cache] {
-    // Thread will update metrics indefinitely until exit flag set
-    while (!poll_thread_exit_.load()) {
-      // Sleep for metric interval
-      std::this_thread::sleep_for(
-          std::chrono::milliseconds(metrics_interval_ms_ / 2));
-
-      // Poll Response Cache metrics
-      if (cache_metrics_enabled_ && response_cache != nullptr) {
-        PollCacheMetrics(response_cache);
-      }
-
-#ifdef TRITON_ENABLE_METRICS_GPU
-      // Poll DCGM GPU metrics
-      if (gpu_metrics_enabled_ &&
-          dcgm_metadata_.available_cuda_gpu_ids_.size() > 0) {
-        PollDcgmMetrics();
-      }
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-#ifdef TRITON_ENABLE_METRICS_CPU
-      if (cpu_metrics_enabled_) {
-        PollCpuMetrics();
-      }
-#endif  // TRITON_ENABLE_METRICS_CPU
-    }
-  }));
-
-  return true;
-}
-
-bool
-Metrics::PollCacheMetrics(std::shared_ptr<RequestResponseCache> response_cache)
-{
-  if (response_cache == nullptr) {
-    LOG_WARNING << "error polling cache metrics, cache metrics will not be "
-                << "available: cache was nullptr";
-    return false;
-  }
-
-  // Update global cache metrics
-  cache_num_entries_global_->Set(response_cache->NumEntries());
-  cache_num_lookups_global_->Set(response_cache->NumLookups());
-  cache_num_hits_global_->Set(response_cache->NumHits());
-  cache_num_misses_global_->Set(response_cache->NumMisses());
-  cache_num_evictions_global_->Set(response_cache->NumEvictions());
-  cache_lookup_duration_us_global_->Set(
-      response_cache->TotalLookupLatencyNs() / 1000);
-  cache_insertion_duration_us_global_->Set(
-      response_cache->TotalInsertionLatencyNs() / 1000);
-  cache_util_global_->Set(response_cache->TotalUtilization());
-  return true;
-}
-
-#ifdef TRITON_ENABLE_METRICS_CPU
-Status
-Metrics::ParseCpuInfo(CpuInfo& info)
-{
-#ifdef _WIN32
-  return Status(
-      Status::Code::INTERNAL, "CPU metrics not supported on Windows.");
-#else
-  std::ifstream ifs("/proc/stat");
-  if (!ifs.good()) {
-    return Status(Status::Code::INTERNAL, "Failed to open /proc/stat.");
-  }
-
-  std::string line;
-  // Verify first line is aggregate cpu line
-  std::getline(ifs, line);
-  if (line.rfind("cpu ", 0) == std::string::npos) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Failed to find aggregate CPU info in /proc/stat.");
-  }
-
-  std::string _;
-  std::istringstream iss(line);
-  // Use _ to skip "cpu" at start of line
-  if (!(iss >> _ >> info)) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Failed to parse aggregate CPU info in /proc/stat.");
-  }
-  return Status::Success;
-#endif  // OS
-}
-
-Status
-Metrics::ParseMemInfo(MemInfo& info)
-{
-#ifdef _WIN32
-  return Status(
-      Status::Code::INTERNAL, "Memory metrics not supported on Windows.");
-#else
-  std::ifstream ifs("/proc/meminfo");
-  if (!ifs.good()) {
-    return Status(Status::Code::INTERNAL, "Failed to open /proc/meminfo.");
-  }
-
-  std::string line;
-  constexpr uint64_t KB = 1024;
-  while (std::getline(ifs, line)) {
-    std::istringstream iss(line);
-    std::string name;
-    uint64_t value = 0;
-    if (iss >> name >> value) {
-      name.pop_back();
-      info[name] = value * KB;
-    } else {
-      return Status(
-          Status::Code::INTERNAL, "Encountered error parsing /proc/meminfo.");
-    }
-  }
-
-  if (info.find("MemTotal") == info.end() ||
-      info.find("MemAvailable") == info.end()) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Failed to find desired values in /proc/meminfo.");
-  }
-
-  if (info["MemAvailable"] > info["MemTotal"]) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Available bytes shouldn't be greater than Total bytes");
-  }
-
-  // "Used" memory can be defined in many different ways. While many
-  // older applications consider "used = total - (free + cached)", a more
-  // accurate measure of available memory "MemAvailable" was added,
-  // so we choose "used = total - available" for a more accurate measure.
-  // This may change in the future if not sufficient for most use cases.
-  // See https://stackoverflow.com/a/35019697.
-  info["MemUsed"] = info["MemTotal"] - info["MemAvailable"];
-
-  return Status::Success;
-#endif  // OS
-}
-
-double
-Metrics::CpuUtilization(const CpuInfo& info_new, const CpuInfo& info_old)
-{
-  // Account for overflow
-  const auto wrap_sub = [](uint64_t a, uint64_t b) {
-    return (a > b) ? (a - b) : 0;
-  };
-  uint64_t util_diff = wrap_sub(info_new.user, info_old.user) +
-                       wrap_sub(info_new.nice, info_old.nice) +
-                       wrap_sub(info_new.system, info_old.system) +
-                       wrap_sub(info_new.irq, info_old.irq) +
-                       wrap_sub(info_new.softirq, info_old.softirq) +
-                       wrap_sub(info_new.steal, info_old.steal);
-  uint64_t idle_diff = wrap_sub(info_new.idle, info_old.idle) +
-                       wrap_sub(info_new.iowait, info_old.iowait);
-  double util_ratio = static_cast<double>(util_diff) / (util_diff + idle_diff);
-  return util_ratio;
-}
-#endif  // TRITON_ENABLE_METRICS_CPU
-
-bool
-Metrics::PollCpuMetrics()
-{
-#ifndef TRITON_ENABLE_METRICS_CPU
-  return false;
-#else
-  // CPU Utilization
-  double cpu_util = 0.0;
-  auto cpu_info = CpuInfo();
-  auto status = ParseCpuInfo(cpu_info);
-  if (status.IsOk()) {
-    cpu_util = CpuUtilization(cpu_info, last_cpu_info_);
-    last_cpu_info_ = cpu_info;
-  }
-  cpu_utilization_->Set(cpu_util);  // [0.0, 1.0]
-
-  // RAM / Memory
-  double mem_total_bytes = 0.0;
-  double mem_used_bytes = 0.0;
-  auto mem_info = MemInfo();
-  status = ParseMemInfo(mem_info);
-  if (status.IsOk()) {
-    // MemTotal will usually not change over time, but if something
-    // goes wrong when querying memory, we can reflect that by updating.
-    mem_total_bytes = mem_info["MemTotal"];
-    mem_used_bytes = mem_info["MemUsed"];
-  }
-
-  cpu_memory_total_->Set(mem_total_bytes);
-  cpu_memory_used_->Set(mem_used_bytes);
-
-  return true;
-#endif  // TRITON_ENABLE_METRICS_CPU
-}
-
-bool
-Metrics::PollDcgmMetrics()
-{
-#ifndef TRITON_ENABLE_METRICS_GPU
-  return false;
-#else
-
-  if (dcgm_metadata_.available_cuda_gpu_ids_.size() == 0) {
-    LOG_WARNING << "error polling GPU metrics, GPU metrics will not be "
-                << "available: no available gpus to poll";
-    return false;
-  }
-
-  dcgmUpdateAllFields(dcgm_metadata_.dcgm_handle_, 1 /* wait for update*/);
-  for (unsigned int didx = 0;
-       didx < dcgm_metadata_.available_cuda_gpu_ids_.size(); ++didx) {
-    uint32_t cuda_id = dcgm_metadata_.available_cuda_gpu_ids_[didx];
-    if (dcgm_metadata_.cuda_ids_to_dcgm_ids_.count(cuda_id) <= 0) {
-      LOG_WARNING << "Cannot find DCGM id for CUDA id " << cuda_id;
-      continue;
-    }
-    uint32_t dcgm_id = dcgm_metadata_.cuda_ids_to_dcgm_ids_.at(cuda_id);
-    dcgmFieldValue_v1 field_values[dcgm_metadata_.field_count_];
-    dcgmReturn_t dcgmerr = dcgmGetLatestValuesForFields(
-        dcgm_metadata_.dcgm_handle_, dcgm_id, dcgm_metadata_.fields_.data(),
-        dcgm_metadata_.field_count_, field_values);
-
-    if (dcgmerr != DCGM_ST_OK) {
-      dcgm_metadata_.power_limit_fail_cnt_[didx]++;
-      dcgm_metadata_.power_usage_fail_cnt_[didx]++;
-      dcgm_metadata_.energy_fail_cnt_[didx]++;
-      dcgm_metadata_.util_fail_cnt_[didx]++;
-      dcgm_metadata_.mem_fail_cnt_[didx]++;
-      LOG_WARNING << "Unable to get field values for GPU ID " << cuda_id << ": "
-                  << errorString(dcgmerr);
-    } else {
-      // Power limit
-      if (dcgm_metadata_.power_limit_fail_cnt_[didx] <
-          dcgm_metadata_.fail_threshold_) {
-        double power_limit = field_values[0].value.dbl;
-        if ((field_values[0].status == DCGM_ST_OK) &&
-            (!DCGM_FP64_IS_BLANK(power_limit))) {
-          dcgm_metadata_.power_limit_fail_cnt_[didx] = 0;
-        } else {
-          dcgm_metadata_.power_limit_fail_cnt_[didx]++;
-          power_limit = 0;
-          dcgmReturn_t status = dcgmReturn_t(field_values[0].status);
-          LOG_WARNING << "Unable to get power limit for GPU " << cuda_id
-                      << ". Status:" << errorString(status)
-                      << ", value:" << dcgmValueToErrorMessage(power_limit);
-        }
-        gpu_power_limit_[didx]->Set(power_limit);
-      }
-
-      // Power usage
-      if (dcgm_metadata_.power_usage_fail_cnt_[didx] <
-          dcgm_metadata_.fail_threshold_) {
-        double power_usage = field_values[1].value.dbl;
-        if ((field_values[1].status == DCGM_ST_OK) &&
-            (!DCGM_FP64_IS_BLANK(power_usage))) {
-          dcgm_metadata_.power_usage_fail_cnt_[didx] = 0;
-        } else {
-          dcgm_metadata_.power_usage_fail_cnt_[didx]++;
-          power_usage = 0;
-          dcgmReturn_t status = dcgmReturn_t(field_values[1].status);
-          LOG_WARNING << "Unable to get power usage for GPU " << cuda_id
-                      << ". Status:" << errorString(status)
-                      << ", value:" << dcgmValueToErrorMessage(power_usage);
-        }
-        gpu_power_usage_[didx]->Set(power_usage);
-      }
-
-      // Energy Consumption
-      if (dcgm_metadata_.energy_fail_cnt_[didx] <
-          dcgm_metadata_.fail_threshold_) {
-        int64_t energy = field_values[2].value.i64;
-        if ((field_values[2].status == DCGM_ST_OK) &&
-            (!DCGM_INT64_IS_BLANK(energy))) {
-          dcgm_metadata_.energy_fail_cnt_[didx] = 0;
-          if (dcgm_metadata_.last_energy_[didx] == 0) {
-            dcgm_metadata_.last_energy_[didx] = energy;
-          }
-          gpu_energy_consumption_[didx]->Increment(
-              (double)(energy - dcgm_metadata_.last_energy_[didx]) * 0.001);
-          dcgm_metadata_.last_energy_[didx] = energy;
-        } else {
-          dcgm_metadata_.energy_fail_cnt_[didx]++;
-          energy = 0;
-          dcgmReturn_t status = dcgmReturn_t(field_values[2].status);
-          LOG_WARNING << "Unable to get energy consumption for "
-                      << "GPU " << cuda_id << ". Status:" << errorString(status)
-                      << ", value:" << dcgmValueToErrorMessage(energy);
-        }
-      }
-
-      // Utilization
-      if (dcgm_metadata_.util_fail_cnt_[didx] <
-          dcgm_metadata_.fail_threshold_) {
-        int64_t util = field_values[3].value.i64;
-        if ((field_values[3].status == DCGM_ST_OK) &&
-            (!DCGM_INT64_IS_BLANK(util))) {
-          dcgm_metadata_.util_fail_cnt_[didx] = 0;
-        } else {
-          dcgm_metadata_.util_fail_cnt_[didx]++;
-          util = 0;
-          dcgmReturn_t status = dcgmReturn_t(field_values[3].status);
-          LOG_WARNING << "Unable to get GPU utilization for GPU " << cuda_id
-                      << ". Status:" << errorString(status)
-                      << ", value:" << dcgmValueToErrorMessage(util);
-        }
-        gpu_utilization_[didx]->Set((double)util * 0.01);
-      }
-
-      // Memory Usage
-      if (dcgm_metadata_.mem_fail_cnt_[didx] < dcgm_metadata_.fail_threshold_) {
-        int64_t memory_used = field_values[4].value.i64;
-        int64_t memory_total = field_values[5].value.i64;
-        if ((field_values[4].status == DCGM_ST_OK) &&
-            (!DCGM_INT64_IS_BLANK(memory_used)) &&
-            (field_values[5].status == DCGM_ST_OK) &&
-            (!DCGM_INT64_IS_BLANK(memory_total))) {
-          dcgm_metadata_.mem_fail_cnt_[didx] = 0;
-        } else {
-          memory_total = 0;
-          memory_used = 0;
-          dcgm_metadata_.mem_fail_cnt_[didx]++;
-          dcgmReturn_t usageStatus = dcgmReturn_t(field_values[4].status);
-          dcgmReturn_t memoryTotaltatus = dcgmReturn_t(field_values[5].status);
-          LOG_WARNING << "Unable to get memory usage for GPU " << cuda_id
-                      << ". Memory usage status:" << errorString(usageStatus)
-                      << ", value:" << dcgmValueToErrorMessage(memory_used)
-                      << ". Memory total status:"
-                      << errorString(memoryTotaltatus)
-                      << ", value:" << dcgmValueToErrorMessage(memory_total);
-        }
-        gpu_memory_total_[didx]->Set(memory_total * 1024 * 1024);  // bytes
-        gpu_memory_used_[didx]->Set(memory_used * 1024 * 1024);    // bytes
-      }
-    }
-  }
-  return true;
-#endif  // TRITON_ENABLE_METRICS_GPU
-}
-
-bool
-Metrics::InitializeCacheMetrics(
-    std::shared_ptr<RequestResponseCache> response_cache)
-{
-  if (response_cache == nullptr) {
-    LOG_WARNING
-        << "error initializing cache metrics, cache metrics will not be "
-        << "available: cache was nullptr";
-    return false;
-  }
-
-  const std::map<std::string, std::string> cache_labels;
-  cache_num_entries_global_ = &cache_num_entries_family_.Add(cache_labels);
-  cache_num_lookups_global_ = &cache_num_lookups_family_.Add(cache_labels);
-  cache_num_hits_global_ = &cache_num_hits_family_.Add(cache_labels);
-  cache_num_misses_global_ = &cache_num_misses_family_.Add(cache_labels);
-  cache_num_evictions_global_ = &cache_num_evictions_family_.Add(cache_labels);
-  cache_lookup_duration_us_global_ =
-      &cache_lookup_duration_us_family_.Add(cache_labels);
-  cache_insertion_duration_us_global_ =
-      &cache_insertion_duration_us_family_.Add(cache_labels);
-  cache_util_global_ = &cache_util_family_.Add(cache_labels);
-  LOG_INFO << "Collecting Response Cache metrics";
-  return true;
-}
-
-bool
-Metrics::InitializeCpuMetrics()
-{
-#ifndef TRITON_ENABLE_METRICS_CPU
-  return false;
-#else
-  const std::map<std::string, std::string> cpu_labels;
-  cpu_utilization_ = &cpu_utilization_family_.Add(cpu_labels);
-  cpu_memory_total_ = &cpu_memory_total_family_.Add(cpu_labels);
-  cpu_memory_used_ = &cpu_memory_used_family_.Add(cpu_labels);
-
-  // Get baseline CPU info for future comparisons
-  last_cpu_info_ = CpuInfo();
-  auto status = ParseCpuInfo(last_cpu_info_);
-  if (!status.IsOk()) {
-    LOG_WARNING << "error initializing CPU metrics, CPU utilization may not "
-                   "be available: "
-                << status.Message();
-    return false;
-  }
-
-  // Verify memory metrics can be parsed
-  auto mem_info = MemInfo();
-  status = ParseMemInfo(mem_info);
-  if (!status.IsOk()) {
-    LOG_WARNING << "error initializing CPU metrics, CPU memory metrics may not "
-                   "be available: "
-                << status.Message();
-    return false;
-  }
-
-  LOG_INFO << "Collecting CPU metrics";
-  return true;
-#endif  // TRITON_ENABLE_METRICS_CPU
-}
-
-bool
-Metrics::InitializeDcgmMetrics()
-{
-#ifndef TRITON_ENABLE_METRICS_GPU
-  return false;
-#else
-  dcgmReturn_t dcgmerr = dcgmInit();
-  if (dcgmerr != DCGM_ST_OK) {
-    LOG_WARNING << "error initializing DCGM, GPU metrics will not be "
-                << "available: " << errorString(dcgmerr);
-    return false;
-  }
-
-  if (dcgm_metadata_.standalone_) {
-    char hostIpAddress[16] = {0};
-    std::string ipAddress = "127.0.0.1";
-    strncpy(hostIpAddress, ipAddress.c_str(), 15);
-    dcgmerr = dcgmConnect(hostIpAddress, &dcgm_metadata_.dcgm_handle_);
-  } else {
-    dcgmerr = dcgmStartEmbedded(
-        DCGM_OPERATION_MODE_MANUAL, &dcgm_metadata_.dcgm_handle_);
-  }
-  if (dcgmerr != DCGM_ST_OK) {
-    LOG_WARNING << "DCGM unable to start: " << errorString(dcgmerr);
-    return false;
-  } else {
-    // Set this flag to signal DCGM cleanup in destructor
-    dcgm_metadata_.dcgm_initialized_ = true;
-  }
-
-  if (dcgm_metadata_.standalone_) {
-    dcgmerr = dcgmUpdateAllFields(dcgm_metadata_.dcgm_handle_, 1);
-    if (dcgmerr != DCGM_ST_OK) {
-      LOG_WARNING << "DCGM unable to update all fields, GPU metrics will "
-                     "not be available: "
-                  << errorString(dcgmerr);
-      return false;
-    }
-  }
-
-  unsigned int dcgm_gpu_ids[DCGM_MAX_NUM_DEVICES];
-  int dcgm_gpu_count;
-  dcgmerr = dcgmGetAllDevices(
-      dcgm_metadata_.dcgm_handle_, dcgm_gpu_ids, &dcgm_gpu_count);
-  if (dcgmerr != DCGM_ST_OK) {
-    LOG_WARNING << "DCGM unable to get device info and count, GPU "
-                   "metrics will not be available: "
-                << errorString(dcgmerr);
-    return false;
-  }
-
-  // Get PCI Bus ID to DCGM device Id map.
-  // Some devices may have problems using DCGM API and
-  // these devices needs to be ignored.
-  std::map<std::string, size_t> pci_bus_id_to_dcgm_id;
-  std::map<std::string, std::map<std::string, std::string> >
-      pci_bus_id_to_gpu_labels;
-  std::map<std::string, std::string> pci_bus_id_to_device_name;
-  dcgmDeviceAttributes_t gpu_attributes[DCGM_MAX_NUM_DEVICES];
-  for (int i = 0; i < dcgm_gpu_count; i++) {
-    gpu_attributes[i].version = dcgmDeviceAttributes_version;
-    dcgmerr = dcgmGetDeviceAttributes(
-        dcgm_metadata_.dcgm_handle_, dcgm_gpu_ids[i], &gpu_attributes[i]);
-    if (dcgmerr != DCGM_ST_OK) {
-      LOG_WARNING << "DCGM unable to get device properties for DCGM device "
-                  << dcgm_gpu_ids[i]
-                  << ", GPU metrics will not be available for this device: "
-                  << errorString(dcgmerr);
-    } else {
-      std::string pciBusId = gpu_attributes[i].identifiers.pciBusId;
-      pci_bus_id_to_dcgm_id[pciBusId] = i;
-      pci_bus_id_to_device_name[pciBusId] =
-          std::string(gpu_attributes[i].identifiers.deviceName);
-      std::map<std::string, std::string> gpu_labels;
-      gpu_labels.insert(std::map<std::string, std::string>::value_type(
-          kMetricsLabelGpuUuid,
-          std::string(gpu_attributes[i].identifiers.uuid)));
-      pci_bus_id_to_gpu_labels[pciBusId] = gpu_labels;
-    }
-  }
-
-
-  // Get CUDA-visible PCI Bus Ids and get DCGM metrics for each CUDA-visible GPU
-  int cuda_gpu_count;
-  cudaError_t cudaerr = cudaGetDeviceCount(&cuda_gpu_count);
-  if (cudaerr != cudaSuccess) {
-    LOG_WARNING
-        << "Cannot get CUDA device count, GPU metrics will not be available";
-    return false;
-  }
-  for (int i = 0; i < cuda_gpu_count; ++i) {
-    std::string pci_bus_id = "0000";  // pad 0's for uniformity
-    char pcibusid_str[64];
-    cudaerr = cudaDeviceGetPCIBusId(pcibusid_str, sizeof(pcibusid_str) - 1, i);
-    if (cudaerr == cudaSuccess) {
-      pci_bus_id.append(pcibusid_str);
-      if (pci_bus_id_to_dcgm_id.count(pci_bus_id) <= 0) {
-        LOG_INFO << "Skipping GPU:" << i
-                 << " since it's not CUDA enabled. This should never happen!";
-        continue;
-      }
-      // Filter out CUDA visible GPUs from GPUs found by DCGM
-      LOG_INFO << "Collecting metrics for GPU " << i << ": "
-               << pci_bus_id_to_device_name[pci_bus_id];
-      auto& gpu_labels = pci_bus_id_to_gpu_labels[pci_bus_id];
-      gpu_utilization_.push_back(&gpu_utilization_family_.Add(gpu_labels));
-      gpu_memory_total_.push_back(&gpu_memory_total_family_.Add(gpu_labels));
-      gpu_memory_used_.push_back(&gpu_memory_used_family_.Add(gpu_labels));
-      gpu_power_usage_.push_back(&gpu_power_usage_family_.Add(gpu_labels));
-      gpu_power_limit_.push_back(&gpu_power_limit_family_.Add(gpu_labels));
-      gpu_energy_consumption_.push_back(
-          &gpu_energy_consumption_family_.Add(gpu_labels));
-      uint32_t dcgm_id = pci_bus_id_to_dcgm_id[pci_bus_id];
-      dcgm_metadata_.cuda_ids_to_dcgm_ids_[i] = dcgm_id;
-      dcgm_metadata_.available_cuda_gpu_ids_.emplace_back(i);
-    } else {
-      LOG_WARNING << "GPU metrics will not be available for device:" << i;
-    }
-  }
-
-  // create a gpu group
-  char groupName[] = "dcgm_group";
-  dcgmerr = dcgmGroupCreate(
-      dcgm_metadata_.dcgm_handle_, DCGM_GROUP_DEFAULT, groupName,
-      &dcgm_metadata_.groupId_);
-  if (dcgmerr != DCGM_ST_OK) {
-    LOG_WARNING << "Cannot make GPU group: " << errorString(dcgmerr);
-  }
-
-  // Initialize tracking vectors
-  for (unsigned int didx = 0;
-       didx < dcgm_metadata_.available_cuda_gpu_ids_.size(); ++didx) {
-    dcgm_metadata_.power_limit_fail_cnt_.push_back(0);
-    dcgm_metadata_.power_usage_fail_cnt_.push_back(0);
-    dcgm_metadata_.energy_fail_cnt_.push_back(0);
-    dcgm_metadata_.util_fail_cnt_.push_back(0);
-    dcgm_metadata_.mem_fail_cnt_.push_back(0);
-    dcgm_metadata_.last_energy_.push_back(0);
-  }
-
-  // Number of fields for DCGM to use from fields_ below
-  dcgm_metadata_.field_count_ = 6;
-  unsigned short util_flag = dcgm_metadata_.standalone_
-                                 ? DCGM_FI_PROF_GR_ENGINE_ACTIVE
-                                 : DCGM_FI_DEV_GPU_UTIL;
-  dcgm_metadata_.fields_ = {
-      DCGM_FI_DEV_POWER_MGMT_LIMIT,          // power limit, watts
-      DCGM_FI_DEV_POWER_USAGE,               // power usage, watts
-      DCGM_FI_DEV_TOTAL_ENERGY_CONSUMPTION,  // Total energy consumption, mJ
-      util_flag,                             // util ratio, 1 = 1%
-      DCGM_FI_DEV_FB_USED,                   // Frame buffer used, MiB
-      DCGM_FI_DEV_FB_TOTAL,                  // Frame buffer used, MiB
-  };
-
-  char fieldName[] = "field_group";
-  dcgmFieldGrp_t fieldGroupId;
-  dcgmerr = dcgmFieldGroupCreate(
-      dcgm_metadata_.dcgm_handle_, dcgm_metadata_.field_count_,
-      dcgm_metadata_.fields_.data(), fieldName, &fieldGroupId);
-  if (dcgmerr != DCGM_ST_OK) {
-    LOG_WARNING << "Cannot make field group: " << errorString(dcgmerr);
-  }
-
-  dcgmerr = dcgmWatchFields(
-      dcgm_metadata_.dcgm_handle_, dcgm_metadata_.groupId_, fieldGroupId,
-      metrics_interval_ms_ * 1000 /*update period, usec*/,
-      5.0 /*maxKeepAge, sec*/, 5 /*maxKeepSamples*/);
-  if (dcgmerr != DCGM_ST_OK) {
-    LOG_WARNING << "Cannot start watching fields: " << errorString(dcgmerr);
-    return false;
-  }
-
-  return true;
-#endif  // TRITON_ENABLE_METRICS_GPU
-}
-
-#ifdef TRITON_ENABLE_METRICS_GPU
-std::string
-Metrics::dcgmValueToErrorMessage(double val)
-{
-  if (DCGM_FP64_IS_BLANK(val)) {
-    if (val == DCGM_FP64_BLANK) {
-      return "Not Specified";
-    } else if (val == DCGM_FP64_NOT_FOUND) {
-      return "Not Found";
-    } else if (val == DCGM_FP64_NOT_SUPPORTED) {
-      return "Not Supported";
-    } else if (val == DCGM_FP64_NOT_PERMISSIONED) {
-      return "Insf. Permission";
-    } else {
-      return "Unknown";
-    }
-  } else {
-    return std::to_string(val);
-  }
-}
-
-std::string
-Metrics::dcgmValueToErrorMessage(int64_t val)
-{
-  if (DCGM_INT64_IS_BLANK(val)) {
-    switch (val) {
-      case DCGM_INT64_BLANK:
-        return "Not Specified";
-      case DCGM_INT64_NOT_FOUND:
-        return "Not Found";
-      case DCGM_INT64_NOT_SUPPORTED:
-        return "Not Supported";
-      case DCGM_INT64_NOT_PERMISSIONED:
-        return "Insf. Permission";
-      default:
-        return "Unknown";
-    }
-  } else {
-    return std::to_string(val);
-  }
-}
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-bool
-Metrics::UUIDForCudaDevice(int cuda_device, std::string* uuid)
-{
-  // If metrics were not initialized then just silently fail since
-  // with DCGM we can't get the CUDA device (and not worth doing
-  // anyway since metrics aren't being reported).
-  auto singleton = GetSingleton();
-  if (!singleton->gpu_metrics_enabled_) {
-    return false;
-  }
-
-  // If GPU metrics is not enabled just silently fail.
-#ifndef TRITON_ENABLE_METRICS_GPU
-  return false;
-#else
-
-  dcgmDeviceAttributes_t gpu_attributes;
-  gpu_attributes.version = dcgmDeviceAttributes_version;
-  dcgmReturn_t dcgmerr = dcgmGetDeviceAttributes(
-      singleton->dcgm_metadata_.dcgm_handle_, cuda_device, &gpu_attributes);
-  if (dcgmerr != DCGM_ST_OK) {
-    LOG_ERROR << "Unable to get device UUID: " << errorString(dcgmerr);
-    return false;
-  }
-
-  *uuid = gpu_attributes.identifiers.uuid;
-  return true;
-#endif  // TRITON_ENABLE_METRICS_GPU
-}
-
-std::shared_ptr<prometheus::Registry>
-Metrics::GetRegistry()
-{
-  auto singleton = Metrics::GetSingleton();
-  return singleton->registry_;
-}
-
-const std::string
-Metrics::SerializedMetrics()
-{
-  auto singleton = Metrics::GetSingleton();
-  return singleton->serializer_->Serialize(
-      singleton->registry_.get()->Collect());
-}
-
-Metrics*
-Metrics::GetSingleton()
-{
-  static Metrics singleton;
-  return &singleton;
-}
-
-}}  // namespace triton::core
-
-#endif  // TRITON_ENABLE_METRICS
--- a/3rdparty/core-r22.12/src/metrics.h
+++ b/3rdparty/core-r22.12/src/metrics.h
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#ifdef TRITON_ENABLE_METRICS
-
-#include <atomic>
-#include <mutex>
-#include <thread>
-#include "prometheus/counter.h"
-#include "prometheus/gauge.h"
-#include "prometheus/registry.h"
-#include "prometheus/serializer.h"
-#include "prometheus/text_serializer.h"
-#include "response_cache.h"
-
-#ifdef TRITON_ENABLE_METRICS_GPU
-#include <dcgm_agent.h>
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-namespace triton { namespace core {
-
-#ifdef TRITON_ENABLE_METRICS_CPU
-using MemInfo = std::unordered_map<std::string, uint64_t>;
-
-// References:
-// - htop source: https://stackoverflow.com/a/23376195
-// - Linux docs: https://www.kernel.org/doc/Documentation/filesystems/proc.txt
-// guest/guestnice values are counted in user/nice so we skip parsing them
-struct CpuInfo {
-  uint64_t user = 0;     // normal processes executing in user mode
-  uint64_t nice = 0;     // niced processes executing in user mode
-  uint64_t system = 0;   // processes executing in kernel mode
-  uint64_t idle = 0;     // twiddling thumbs
-  uint64_t iowait = 0;   // waiting for I/O to complete
-  uint64_t irq = 0;      // servicing interrupts
-  uint64_t softirq = 0;  // servicing softirqs
-  uint64_t steal = 0;    // involuntary wait
-};
-
-inline std::istream&
-operator>>(std::istream& is, CpuInfo& info)
-{
-  is >> info.user >> info.nice >> info.system >> info.idle >> info.iowait >>
-      info.irq >> info.softirq >> info.steal;
-  return is;
-}
-#endif  // TRITON_ENABLE_METRICS_CPU
-
-#ifdef TRITON_ENABLE_METRICS_GPU
-struct DcgmMetadata {
-  // DCGM handles for initialization and destruction
-  dcgmHandle_t dcgm_handle_ = 0;
-  dcgmGpuGrp_t groupId_ = 0;
-  // DCGM Flags
-  bool standalone_ = false;
-  // DCGM Fields
-  size_t field_count_ = 0;
-  std::vector<unsigned short> fields_;
-  // GPU Device Mapping
-  std::map<uint32_t, uint32_t> cuda_ids_to_dcgm_ids_;
-  std::vector<uint32_t> available_cuda_gpu_ids_;
-  // Stop attempting metrics if they fail multiple consecutive
-  // times for a device.
-  const int fail_threshold_ = 3;
-  // DCGM Failure Tracking
-  std::vector<int> power_limit_fail_cnt_;
-  std::vector<int> power_usage_fail_cnt_;
-  std::vector<int> energy_fail_cnt_;
-  std::vector<int> util_fail_cnt_;
-  std::vector<int> mem_fail_cnt_;
-  // DCGM Energy Tracking
-  std::vector<unsigned long long> last_energy_;
-  // Track if DCGM handle initialized successfully
-  bool dcgm_initialized_ = false;
-};
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-class Metrics {
- public:
-  // Return the hash value of the labels
-  static size_t HashLabels(const std::map<std::string, std::string>& labels);
-
-  // Are metrics enabled?
-  static bool Enabled();
-
-  // Enable reporting of metrics
-  static void EnableMetrics();
-
-  // Enable reporting of GPU metrics
-  static void EnableGPUMetrics();
-
-  // Enable reporting of CPU metrics
-  static void EnableCpuMetrics();
-
-  // Enable reporting of Cache metrics
-  static void EnableCacheMetrics(
-      std::shared_ptr<RequestResponseCache> response_cache);
-
-  // Start a thread for polling enabled metrics if any
-  static void StartPollingThreadSingleton(
-      std::shared_ptr<RequestResponseCache> response_cache);
-
-  // Set the time interval in secs at which metrics are collected
-  static void SetMetricsInterval(uint64_t metrics_interval_ms);
-
-  // Get the prometheus registry
-  static std::shared_ptr<prometheus::Registry> GetRegistry();
-
-  // Get serialized metrics
-  static const std::string SerializedMetrics();
-
-  // Get the UUID for a CUDA device. Return true and initialize 'uuid'
-  // if a UUID is found, return false if a UUID cannot be returned.
-  static bool UUIDForCudaDevice(int cuda_device, std::string* uuid);
-
-  // Metric family counting successful inference requests
-  static prometheus::Family<prometheus::Counter>& FamilyInferenceSuccess()
-  {
-    return GetSingleton()->inf_success_family_;
-  }
-
-  // Metric family counting failed inference requests
-  static prometheus::Family<prometheus::Counter>& FamilyInferenceFailure()
-  {
-    return GetSingleton()->inf_failure_family_;
-  }
-
-  // Metric family counting inferences performed, where a batch-size
-  // 'n' inference request is counted as 'n' inferences
-  static prometheus::Family<prometheus::Counter>& FamilyInferenceCount()
-  {
-    return GetSingleton()->inf_count_family_;
-  }
-
-  // Metric family counting inferences performed, where a batch-size
-  // 'n' inference request is counted as 'n' inferences
-  static prometheus::Family<prometheus::Counter>&
-  FamilyInferenceExecutionCount()
-  {
-    return GetSingleton()->inf_count_exec_family_;
-  }
-
-  // Metric family of cumulative inference request duration, in
-  // microseconds
-  static prometheus::Family<prometheus::Counter>&
-  FamilyInferenceRequestDuration()
-  {
-    return GetSingleton()->inf_request_duration_us_family_;
-  }
-
-  // Metric family of cumulative inference queuing duration, in
-  // microseconds
-  static prometheus::Family<prometheus::Counter>& FamilyInferenceQueueDuration()
-  {
-    return GetSingleton()->inf_queue_duration_us_family_;
-  }
-
-  // Metric family of cumulative inference compute durations, in
-  // microseconds
-  static prometheus::Family<prometheus::Counter>&
-  FamilyInferenceComputeInputDuration()
-  {
-    return GetSingleton()->inf_compute_input_duration_us_family_;
-  }
-  static prometheus::Family<prometheus::Counter>&
-  FamilyInferenceComputeInferDuration()
-  {
-    return GetSingleton()->inf_compute_infer_duration_us_family_;
-  }
-  static prometheus::Family<prometheus::Counter>&
-  FamilyInferenceComputeOutputDuration()
-  {
-    return GetSingleton()->inf_compute_output_duration_us_family_;
-  }
-  // Metric families of per-model response cache metrics
-  static prometheus::Family<prometheus::Counter>& FamilyCacheHitCount()
-  {
-    return GetSingleton()->cache_num_hits_model_family_;
-  }
-  static prometheus::Family<prometheus::Counter>& FamilyCacheHitLookupDuration()
-  {
-    return GetSingleton()->cache_hit_lookup_duration_us_model_family_;
-  }
-  static prometheus::Family<prometheus::Counter>& FamilyCacheMissCount()
-  {
-    return GetSingleton()->cache_num_misses_model_family_;
-  }
-  static prometheus::Family<prometheus::Counter>&
-  FamilyCacheMissLookupDuration()
-  {
-    return GetSingleton()->cache_miss_lookup_duration_us_model_family_;
-  }
-  static prometheus::Family<prometheus::Counter>&
-  FamilyCacheMissInsertionDuration()
-  {
-    return GetSingleton()->cache_miss_insertion_duration_us_model_family_;
-  }
-
-
- private:
-  Metrics();
-  virtual ~Metrics();
-  static Metrics* GetSingleton();
-  bool InitializeDcgmMetrics();
-  bool InitializeCpuMetrics();
-  bool InitializeCacheMetrics(
-      std::shared_ptr<RequestResponseCache> response_cache);
-  bool StartPollingThread(std::shared_ptr<RequestResponseCache> response_cache);
-  bool PollCacheMetrics(std::shared_ptr<RequestResponseCache> response_cache);
-  bool PollDcgmMetrics();
-  bool PollCpuMetrics();
-
-  std::string dcgmValueToErrorMessage(double val);
-  std::string dcgmValueToErrorMessage(int64_t val);
-
-  std::shared_ptr<prometheus::Registry> registry_;
-  std::unique_ptr<prometheus::Serializer> serializer_;
-
-  prometheus::Family<prometheus::Counter>& inf_success_family_;
-  prometheus::Family<prometheus::Counter>& inf_failure_family_;
-  prometheus::Family<prometheus::Counter>& inf_count_family_;
-  prometheus::Family<prometheus::Counter>& inf_count_exec_family_;
-  prometheus::Family<prometheus::Counter>& inf_request_duration_us_family_;
-  prometheus::Family<prometheus::Counter>& inf_queue_duration_us_family_;
-  prometheus::Family<prometheus::Counter>&
-      inf_compute_input_duration_us_family_;
-  prometheus::Family<prometheus::Counter>&
-      inf_compute_infer_duration_us_family_;
-  prometheus::Family<prometheus::Counter>&
-      inf_compute_output_duration_us_family_;
-  // Global Response Cache metrics
-  prometheus::Family<prometheus::Gauge>& cache_num_entries_family_;
-  prometheus::Family<prometheus::Gauge>& cache_num_lookups_family_;
-  prometheus::Family<prometheus::Gauge>& cache_num_hits_family_;
-  prometheus::Family<prometheus::Gauge>& cache_num_misses_family_;
-  prometheus::Family<prometheus::Gauge>& cache_num_evictions_family_;
-  prometheus::Family<prometheus::Gauge>& cache_lookup_duration_us_family_;
-  prometheus::Family<prometheus::Gauge>& cache_insertion_duration_us_family_;
-  prometheus::Family<prometheus::Gauge>& cache_util_family_;
-  // Gauges for Global Response Cache metrics
-  prometheus::Gauge* cache_num_entries_global_;
-  prometheus::Gauge* cache_num_lookups_global_;
-  prometheus::Gauge* cache_num_hits_global_;
-  prometheus::Gauge* cache_num_misses_global_;
-  prometheus::Gauge* cache_num_evictions_global_;
-  prometheus::Gauge* cache_lookup_duration_us_global_;
-  prometheus::Gauge* cache_insertion_duration_us_global_;
-  prometheus::Gauge* cache_util_global_;
-  // Per-model Response Cache metrics
-  prometheus::Family<prometheus::Counter>& cache_num_hits_model_family_;
-  prometheus::Family<prometheus::Counter>&
-      cache_hit_lookup_duration_us_model_family_;
-  prometheus::Family<prometheus::Counter>& cache_num_misses_model_family_;
-  prometheus::Family<prometheus::Counter>&
-      cache_miss_lookup_duration_us_model_family_;
-  prometheus::Family<prometheus::Counter>&
-      cache_miss_insertion_duration_us_model_family_;
-
-#ifdef TRITON_ENABLE_METRICS_GPU
-  prometheus::Family<prometheus::Gauge>& gpu_utilization_family_;
-  prometheus::Family<prometheus::Gauge>& gpu_memory_total_family_;
-  prometheus::Family<prometheus::Gauge>& gpu_memory_used_family_;
-  prometheus::Family<prometheus::Gauge>& gpu_power_usage_family_;
-  prometheus::Family<prometheus::Gauge>& gpu_power_limit_family_;
-  prometheus::Family<prometheus::Counter>& gpu_energy_consumption_family_;
-
-  std::vector<prometheus::Gauge*> gpu_utilization_;
-  std::vector<prometheus::Gauge*> gpu_memory_total_;
-  std::vector<prometheus::Gauge*> gpu_memory_used_;
-  std::vector<prometheus::Gauge*> gpu_power_usage_;
-  std::vector<prometheus::Gauge*> gpu_power_limit_;
-  std::vector<prometheus::Counter*> gpu_energy_consumption_;
-
-  DcgmMetadata dcgm_metadata_;
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-#ifdef TRITON_ENABLE_METRICS_CPU
-  // Parses "/proc/meminfo" for metrics, currently only supported on Linux.
-  Status ParseMemInfo(MemInfo& info);
-  // Parses "/proc/stat" for metrics, currently only supported on Linux.
-  Status ParseCpuInfo(CpuInfo& info);
-  // Computes CPU utilization between "info_new" and "info_old" values
-  double CpuUtilization(const CpuInfo& info_new, const CpuInfo& info_old);
-
-  prometheus::Family<prometheus::Gauge>& cpu_utilization_family_;
-  prometheus::Family<prometheus::Gauge>& cpu_memory_total_family_;
-  prometheus::Family<prometheus::Gauge>& cpu_memory_used_family_;
-
-  prometheus::Gauge* cpu_utilization_;
-  prometheus::Gauge* cpu_memory_total_;
-  prometheus::Gauge* cpu_memory_used_;
-  CpuInfo last_cpu_info_;
-#endif  // TRITON_ENABLE_METRICS_CPU
-
-  // Thread for polling cache/gpu metrics periodically
-  std::unique_ptr<std::thread> poll_thread_;
-  std::atomic<bool> poll_thread_exit_;
-  bool metrics_enabled_;
-  bool gpu_metrics_enabled_;
-  bool cpu_metrics_enabled_;
-  bool cache_metrics_enabled_;
-  bool poll_thread_started_;
-  std::mutex metrics_enabling_;
-  std::mutex poll_thread_starting_;
-  uint64_t metrics_interval_ms_;
-};
-
-}}  // namespace triton::core
-
-#endif  // TRITON_ENABLE_METRICS
--- a/3rdparty/core-r22.12/src/model.cc
+++ b/3rdparty/core-r22.12/src/model.cc
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "model.h"
-
-#include <chrono>
-#include <future>
-#include "constants.h"
-#include "filesystem.h"
-#include "infer_request.h"
-#include "model_config_utils.h"
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-Status
-Model::GetInput(
-    const std::string& name, const inference::ModelInput** input) const
-{
-  const auto itr = input_map_.find(name);
-  if (itr == input_map_.end()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "unexpected inference input '" + name + "' for model '" + Name() + "'");
-  }
-
-  *input = &itr->second;
-  return Status::Success;
-}
-
-Status
-Model::GetOutput(
-    const std::string& name, const inference::ModelOutput** output) const
-{
-  const auto itr = output_map_.find(name);
-  if (itr == output_map_.end()) {
-    return Status(
-        Status::Code::INVALID_ARG, "unexpected inference output '" + name +
-                                       "' for model '" + Name() + "'");
-  }
-
-  *output = &itr->second;
-  return Status::Success;
-}
-
-Status
-Model::SetModelConfig(const inference::ModelConfig& config)
-{
-  config_ = config;
-  set_model_config_ = true;
-
-  return Status::Success;
-}
-
-Status
-Model::SetScheduler(std::unique_ptr<Scheduler> scheduler)
-{
-  if (scheduler_ != nullptr) {
-    return Status(
-        Status::Code::INTERNAL, "Attempt to change scheduler not allowed");
-  }
-
-  scheduler_ = std::move(scheduler);
-  return Status::Success;
-}
-
-Status
-Model::Init(const bool is_config_provided)
-{
-  if (!set_model_config_ && !is_config_provided) {
-    return Status(
-        Status::Code::NOT_FOUND,
-        "model configuration is not provided for model '" + Name() + "'");
-  }
-
-  RETURN_IF_ERROR(ValidateModelConfig(config_, min_compute_capability_));
-  RETURN_IF_ERROR(ValidateModelIOConfig(config_));
-
-  // Initialize the input map
-  for (const auto& io : config_.input()) {
-    input_map_.insert(std::make_pair(io.name(), io));
-    if (!io.optional()) {
-      ++required_input_count_;
-    }
-  }
-
-  // Initialize the output map and label provider for each output
-  label_provider_ = std::make_shared<LabelProvider>();
-  for (const auto& io : config_.output()) {
-    output_map_.insert(std::make_pair(io.name(), io));
-
-    if (!io.label_filename().empty()) {
-      const auto label_path = JoinPath({model_dir_, io.label_filename()});
-      RETURN_IF_ERROR(label_provider_->AddLabels(io.name(), label_path));
-    }
-  }
-
-  if (config_.has_dynamic_batching()) {
-    default_priority_level_ =
-        config_.dynamic_batching().default_priority_level();
-    max_priority_level_ = config_.dynamic_batching().priority_levels();
-  } else if (config_.has_ensemble_scheduling()) {
-    // For ensemble, allow any priority level to pass through
-    default_priority_level_ = 0;
-    max_priority_level_ = UINT32_MAX;
-  } else {
-    default_priority_level_ = 0;
-    max_priority_level_ = 0;
-  }
-
-  return Status::Success;
-}
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/model.h
+++ b/3rdparty/core-r22.12/src/model.h
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "infer_stats.h"
-#include "label_provider.h"
-#include "model_config.pb.h"
-#include "scheduler.h"
-#include "status.h"
-
-namespace triton { namespace core {
-
-class InferenceRequest;
-
-//
-// Interface for models that handle inference requests.
-//
-class Model {
- public:
-  explicit Model(
-      const double min_compute_capability, const std::string& model_dir,
-      const int64_t version, const inference::ModelConfig& config)
-      : config_(config), min_compute_capability_(min_compute_capability),
-        version_(version), required_input_count_(0), model_dir_(model_dir),
-        set_model_config_(false)
-  {
-  }
-  virtual ~Model() {}
-
-  // Get the name of model being served.
-  const std::string& Name() const { return config_.name(); }
-
-  // Get the version of model being served.
-  int64_t Version() const { return version_; }
-
-  // Get the configuration of model being served.
-  const inference::ModelConfig& Config() const { return config_; }
-
-  // Get the number of required inputs
-  size_t RequiredInputCount() const { return required_input_count_; }
-
-  // Get the stats collector for the model being served.
-  InferenceStatsAggregator* MutableStatsAggregator()
-  {
-    return &stats_aggregator_;
-  }
-  const InferenceStatsAggregator& StatsAggregator() const
-  {
-    return stats_aggregator_;
-  }
-
-  // Get the model configuration for a named input.
-  Status GetInput(
-      const std::string& name, const inference::ModelInput** input) const;
-
-  // Get the model configuration for a named output.
-  Status GetOutput(
-      const std::string& name, const inference::ModelOutput** output) const;
-
-  // Get a label provider for the model.
-  const std::shared_ptr<LabelProvider>& GetLabelProvider() const
-  {
-    return label_provider_;
-  }
-
-  // Initialize the instance for Triton core usage
-  Status Init(const bool is_config_provided);
-
-  // Enqueue a request for execution. If Status::Success is returned
-  // then the model has taken ownership of the request object and so
-  // 'request' will be nullptr. If non-success is returned then the
-  // caller still retains ownership of 'request'.
-  Status Enqueue(std::unique_ptr<InferenceRequest>& request)
-  {
-    return scheduler_->Enqueue(request);
-  }
-
-  // Return the number of in-flight inferences.
-  size_t InflightInferenceCount()
-  {
-    return scheduler_->InflightInferenceCount();
-  }
-
-  // Stop processing future requests unless they are considered as in-flight.
-  void Stop() { scheduler_->Stop(); }
-
-  uint32_t DefaultPriorityLevel() const { return default_priority_level_; }
-
-  uint32_t MaxPriorityLevel() const { return max_priority_level_; }
-
- protected:
-  // Set the configuration of the model being served.
-  Status SetModelConfig(const inference::ModelConfig& config);
-
-  // Explicitly set the scheduler to use for inference requests to the
-  // model. The scheduler can only be set once for a model.
-  Status SetScheduler(std::unique_ptr<Scheduler> scheduler);
-
-  // The scheduler to use for this model.
-  std::unique_ptr<Scheduler> scheduler_;
-
-  // Configuration of the model.
-  inference::ModelConfig config_;
-
- private:
-  // The minimum supported CUDA compute capability.
-  const double min_compute_capability_;
-
-  // Version of the model.
-  int64_t version_;
-
-  // The stats collector for the model.
-  InferenceStatsAggregator stats_aggregator_;
-
-  // Label provider for this model.
-  std::shared_ptr<LabelProvider> label_provider_;
-
-  size_t required_input_count_;
-
-  // Map from input name to the model configuration for that input.
-  std::unordered_map<std::string, inference::ModelInput> input_map_;
-
-  // Map from output name to the model configuration for that output.
-  std::unordered_map<std::string, inference::ModelOutput> output_map_;
-
-  // Path to model
-  std::string model_dir_;
-
-  // The default priority level for the model.
-  uint32_t default_priority_level_;
-
-  // The largest priority value for the model.
-  uint32_t max_priority_level_;
-
-  // Whether or not model config has been set.
-  bool set_model_config_;
-};
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/model_config_cuda.cc
+++ b/3rdparty/core-r22.12/src/model_config_cuda.cc
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "model_config_cuda.h"
-
-#include <cuda_runtime_api.h>
-
-namespace triton { namespace core {
-
-int
-GetCudaStreamPriority(
-    inference::ModelOptimizationPolicy::ModelPriority priority)
-{
-  // Default priority is 0
-  int cuda_stream_priority = 0;
-
-  int min, max;
-  cudaError_t cuerr = cudaDeviceGetStreamPriorityRange(&min, &max);
-  if ((cuerr != cudaErrorNoDevice) && (cuerr != cudaSuccess)) {
-    return 0;
-  }
-
-  switch (priority) {
-    case inference::ModelOptimizationPolicy::PRIORITY_MAX:
-      cuda_stream_priority = max;
-      break;
-    case inference::ModelOptimizationPolicy::PRIORITY_MIN:
-      cuda_stream_priority = min;
-      break;
-    default:
-      cuda_stream_priority = 0;
-      break;
-  }
-
-  return cuda_stream_priority;
-}
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/model_config_cuda.h
+++ b/3rdparty/core-r22.12/src/model_config_cuda.h
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <stdint.h>
-#include "model_config.pb.h"
-
-namespace triton { namespace core {
-
-/// Get the CUDA stream priority for a given ModelPriority
-/// \param priority The inference::ModelOptimizationPolicy::ModelPriority
-/// priority. \param cuda_stream_priority Returns the CUDA stream priority.
-/// \return The error status.
-int GetCudaStreamPriority(
-    inference::ModelOptimizationPolicy::ModelPriority priority);
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/model_config_utils.cc
+++ b/3rdparty/core-r22.12/src/model_config_utils.cc
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "model_config_utils.h"
-
-#include <google/protobuf/util/json_util.h>
-#include <deque>
-#include <mutex>
-#include <set>
-#include "constants.h"
-#include "cuda_utils.h"
-#include "filesystem.h"
-#include "triton/common/logging.h"
-
-#define TRITONJSON_STATUSTYPE triton::core::Status
-#define TRITONJSON_STATUSRETURN(M) \
-  return triton::core::Status(triton::core::Status::Code::INTERNAL, (M))
-#define TRITONJSON_STATUSSUCCESS triton::core::Status::Success
-#include "triton/common/triton_json.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include <cuda_runtime_api.h>
-#endif  // TRITON_ENABLE_GPU
-
-namespace triton { namespace core {
-
-namespace {
-
-#ifdef TRITON_ENABLE_ENSEMBLE
-
-struct EnsembleTensor {
-  EnsembleTensor(bool isOutput) : ready(false), isOutput(isOutput) {}
-  bool ready;
-  bool isOutput;
-  std::vector<EnsembleTensor*> prev_nodes;
-  std::vector<EnsembleTensor*> next_nodes;
-};
-
-/// Build a graph that represents the data flow in the ensemble specified in
-/// given model config. the node (ensemble tensor) in the graph can be looked
-/// up using its name as key.
-/// \param ensemble_config The model configuration that specifies
-/// ensemble_scheduling field.
-/// \param keyed_ensemble_graph Returned the ensemble graph.
-/// \return The error status. A non-OK status indicates the build fails because
-/// the ensemble configuration is not valid.
-Status
-BuildEnsembleGraph(
-    const inference::ModelConfig& config,
-    std::unordered_map<std::string, EnsembleTensor>& keyed_ensemble_graph)
-{
-  keyed_ensemble_graph.clear();
-  size_t step_idx = 0;
-  for (const auto& element : config.ensemble_scheduling().step()) {
-    if (element.model_name().empty()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "must specify 'model_name' in step " + std::to_string(step_idx) +
-              " of ensemble '" + config.name() + "'");
-    }
-    if (element.input_map().size() == 0) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "must specify 'input_map' in step " + std::to_string(step_idx) +
-              " of ensemble '" + config.name() + "'");
-    }
-    if (element.output_map().size() == 0) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "must specify 'output_map' in step " + std::to_string(step_idx) +
-              " of ensemble '" + config.name() + "'");
-    }
-
-    // Link ensemble tensors
-    std::vector<EnsembleTensor*> tensor_as_output;
-    for (const auto& output_map : element.output_map()) {
-      auto it = keyed_ensemble_graph.find(output_map.second);
-      if (it != keyed_ensemble_graph.end()) {
-        if (it->second.isOutput) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "ensemble tensor '" + it->first +
-                  "' can appear in an output map only once for ensemble '" +
-                  config.name() + "' step " + std::to_string(step_idx));
-        } else {
-          it->second.isOutput = true;
-        }
-      } else {
-        it = keyed_ensemble_graph
-                 .emplace(
-                     std::make_pair(output_map.second, EnsembleTensor(true)))
-                 .first;
-      }
-      tensor_as_output.push_back(&(it->second));
-    }
-
-    std::set<std::string> model_inputs;
-    for (const auto& input_map : element.input_map()) {
-      if (model_inputs.find(input_map.first) != model_inputs.end()) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "input '" + input_map.first + "' in model '" +
-                element.model_name() +
-                "' is mapped to multiple ensemble tensors for ensemble '" +
-                config.name() + "' step " + std::to_string(step_idx));
-      } else {
-        model_inputs.emplace(input_map.first);
-      }
-      auto it = keyed_ensemble_graph.find(input_map.second);
-      if (it == keyed_ensemble_graph.end()) {
-        it = keyed_ensemble_graph
-                 .emplace(
-                     std::make_pair(input_map.second, EnsembleTensor(false)))
-                 .first;
-      }
-      for (auto output : tensor_as_output) {
-        output->prev_nodes.push_back(&(it->second));
-        it->second.next_nodes.push_back(output);
-      }
-    }
-
-    step_idx++;
-  }
-
-  return Status::Success;
-}
-
-Status
-ValidateEnsembleSchedulingConfig(const inference::ModelConfig& config)
-{
-  if (config.platform() != kEnsemblePlatform) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "ensemble scheduling cannot be set for model '" + config.name() +
-            "' whose platform is not " + kEnsemblePlatform);
-  }
-  if (config.instance_group().size() != 0) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "instance group should not be specified for ensemble '" +
-            config.name() + "'");
-  }
-  if (config.has_optimization()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "optimization should not be specified for ensemble '" + config.name() +
-            "'");
-  }
-  if (config.model_warmup_size() != 0) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "model_warmup can not be specified for ensemble '" + config.name() +
-            "'");
-  }
-
-  // Make sure step is not empty and all fields are set
-  if (config.ensemble_scheduling().step_size() == 0) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "must specify 'step' for ensemble '" + config.name() + "'");
-  }
-
-  std::unordered_map<std::string, EnsembleTensor> tensors;
-
-  RETURN_IF_ERROR(BuildEnsembleGraph(config, tensors));
-
-  // check data flow
-  std::deque<EnsembleTensor*> ready_queue;
-  for (const auto& input : config.input()) {
-    auto it = tensors.find(input.name());
-    if (it == tensors.end()) {
-      return Status(
-          Status::Code::INVALID_ARG, "ensemble input '" + input.name() +
-                                         "' for ensemble " + config.name() +
-                                         "' is not used");
-    }
-    it->second.ready = true;
-    ready_queue.push_back(&(it->second));
-  }
-  while (!ready_queue.empty()) {
-    auto& ready_node = ready_queue.front();
-    for (auto& next_node : ready_node->next_nodes) {
-      if (next_node->ready) {
-        continue;
-      }
-      bool next_node_ready = true;
-      for (auto& prev_node : next_node->prev_nodes) {
-        if (!prev_node->ready) {
-          next_node_ready = false;
-          break;
-        }
-      }
-      next_node->ready = next_node_ready;
-      if (next_node_ready) {
-        ready_queue.push_back(next_node);
-      }
-    }
-    ready_queue.pop_front();
-  }
-  std::set<std::string> outputs;
-  for (const auto& output : config.output()) {
-    auto it = tensors.find(output.name());
-    if (it == tensors.end()) {
-      return Status(
-          Status::Code::INVALID_ARG, "ensemble output '" + output.name() +
-                                         "' for ensemble " + config.name() +
-                                         "' is not used");
-    }
-    if (!it->second.ready) {
-      return Status(
-          Status::Code::INVALID_ARG, "output '" + output.name() +
-                                         "' for ensemble '" + config.name() +
-                                         "' is not written");
-    } else {
-      outputs.insert(it->first);
-    }
-  }
-  // Check redundant ensemble tensors
-  for (const auto& tensor : tensors) {
-    // skip ensemble outputs as they have been checked and can have no
-    // next nodes
-    if (outputs.find(tensor.first) != outputs.end()) {
-      continue;
-    }
-    if (!tensor.second.ready || (tensor.second.next_nodes.size() == 0)) {
-      return Status(
-          Status::Code::INVALID_ARG, "ensemble tensor '" + tensor.first +
-                                         "' is unused in ensemble '" +
-                                         config.name() + "'");
-    }
-  }
-  return Status::Success;
-}
-
-#endif  // TRITON_ENABLE_ENSEMBLE
-
-template <class ModelIO>
-Status
-ValidateIOShape(
-    const ModelIO& io, int32_t max_batch_size,
-    const std::string& message_prefix = "")
-{
-  if (io.name().empty()) {
-    return Status(
-        Status::Code::INVALID_ARG, message_prefix + "must specify 'name'");
-  }
-
-  if (io.data_type() == inference::DataType::TYPE_INVALID) {
-    return Status(
-        Status::Code::INVALID_ARG, "model output must specify 'data_type'");
-  }
-
-  if (io.dims_size() == 0) {
-    return Status(
-        Status::Code::INVALID_ARG, message_prefix + "must specify 'dims'");
-  }
-
-  // If the configuration is non-batching, then no input or output
-  // reshape can be empty as that would mean that input or output was
-  // always empty (no data).
-  if (io.has_reshape() && (io.reshape().shape_size() == 0) &&
-      (max_batch_size == 0)) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        message_prefix +
-            "cannot have empty reshape for non-batching model as scalar "
-            "tensors are not supported");
-  }
-
-  for (auto dim : io.dims()) {
-    // Dimension cannot be 0.
-    if ((dim < 1) && (dim != triton::common::WILDCARD_DIM)) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          message_prefix + "dimension must be integer >= 1, or " +
-              std::to_string(triton::common::WILDCARD_DIM) +
-              " to indicate a variable-size dimension");
-    }
-  }
-
-  if (io.has_reshape()) {
-    // Zeros are not allowed in reshape.
-    for (auto dim : io.reshape().shape()) {
-      if ((dim < 1) && (dim != triton::common::WILDCARD_DIM)) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            message_prefix + "reshape dimensions must be integer >= 1, or " +
-                std::to_string(triton::common::WILDCARD_DIM) +
-                " to indicate a variable-size dimension");
-      }
-    }
-
-    const int64_t dims_size = triton::common::GetElementCount(io.dims());
-    const int64_t reshape_size =
-        triton::common::GetElementCount(io.reshape().shape());
-
-    // dims and reshape must both have same element count
-    // or both have variable-size dimension.
-    // Special case for empty reshape... expect dims to have element
-    // count of 1.
-    if ((dims_size != reshape_size) &&
-        ((reshape_size != 0) || (dims_size != 1))) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          message_prefix + "has different size for dims and reshape");
-    }
-
-    // shape contains variable-size dimension, in this case we compare if
-    // each pair of the trunks separated by variable-size dimension has
-    // the same element count. For instance, from [2, 4, -1, 6] to [8, -1, 1, 6]
-    // is valid reshape as 2 * 4 = 8 and 6 = 1 * 6.
-    if (dims_size == -1) {
-      std::vector<int64_t> dim_element_cnts;
-      std::vector<int64_t> reshape_element_cnts;
-      int64_t current_cnt = 1;
-      for (const auto& dim : io.dims()) {
-        if (dim != -1) {
-          current_cnt *= dim;
-        } else {
-          dim_element_cnts.push_back(current_cnt);
-          current_cnt = 1;
-        }
-      }
-      dim_element_cnts.push_back(current_cnt);
-
-      current_cnt = 1;
-      for (const auto& dim : io.reshape().shape()) {
-        if (dim != -1) {
-          current_cnt *= dim;
-        } else {
-          reshape_element_cnts.push_back(current_cnt);
-          current_cnt = 1;
-        }
-      }
-      reshape_element_cnts.push_back(current_cnt);
-
-      if (dim_element_cnts.size() != reshape_element_cnts.size()) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            message_prefix +
-                "has different number of variable-size dimensions for dims "
-                "and reshape");
-      }
-      for (size_t idx = 0; idx < dim_element_cnts.size(); idx++) {
-        if (dim_element_cnts[idx] != reshape_element_cnts[idx]) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              message_prefix + "has different size for dims and reshape");
-        }
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-}  // namespace
-
-Status
-GetModelVersionFromPath(const std::string& path, int64_t* version)
-{
-  auto version_dir = BaseName(path);
-
-  // Determine the version from the last segment of 'path'
-  try {
-    *version = std::atoll(version_dir.c_str());
-  }
-  catch (...) {
-    return Status(
-        Status::Code::INTERNAL,
-        "unable to determine model version from " + path);
-  }
-
-  return Status::Success;
-}
-
-Status
-GetBooleanSequenceControlProperties(
-    const inference::ModelSequenceBatching& batcher,
-    const std::string& model_name,
-    const inference::ModelSequenceBatching::Control::Kind control_kind,
-    const bool required, std::string* tensor_name,
-    inference::DataType* tensor_datatype, float* fp32_false_value,
-    float* fp32_true_value, int32_t* int32_false_value,
-    int32_t* int32_true_value, bool* bool_false_value, bool* bool_true_value)
-{
-  // Make sure same tensor is not configured for multiple controls
-  std::set<std::string> seen_tensors;
-
-  // Make sure the control kind is not mentioned multiple times.
-  bool seen_control = false;
-
-  for (const auto& control_input : batcher.control_input()) {
-    if (control_input.name().empty()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "sequence batching control tensor must have a name for " +
-              model_name);
-    }
-
-    if (seen_tensors.find(control_input.name()) != seen_tensors.end()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "sequence batching control tensor '" + control_input.name() +
-              "' is specified for multiple control kinds for " + model_name);
-    }
-
-    seen_tensors.insert(control_input.name());
-
-    for (const auto& c : control_input.control()) {
-      if (c.kind() == control_kind) {
-        if (seen_control) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "sequence batching specifies multiple " +
-                  inference::ModelSequenceBatching_Control_Kind_Name(
-                      control_kind) +
-                  " tensors for " + model_name);
-        }
-
-        *tensor_name = control_input.name();
-        seen_control = true;
-
-        // Make sure only one of int, float, or bool type is specified.
-        if (!((c.int32_false_true_size() != 0) ||
-              (c.fp32_false_true_size() != 0) ||
-              (c.bool_false_true_size() != 0))) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "sequence batching must specify either 'int32_false_true', "
-              "'fp32_false_true' or 'bool_false_true' for " +
-                  inference::ModelSequenceBatching_Control_Kind_Name(
-                      control_kind) +
-                  " for " + model_name);
-        } else if (
-            ((c.int32_false_true_size() != 0) &&
-             (c.fp32_false_true_size() != 0)) ||
-            ((c.int32_false_true_size() != 0) &&
-             (c.bool_false_true_size() != 0)) ||
-            ((c.fp32_false_true_size() != 0) &&
-             (c.bool_false_true_size() != 0))) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "sequence batching specifies more than one from "
-              "'int32_false_true', 'fp32_false_true' and 'bool_false_true' "
-              "for " +
-                  inference::ModelSequenceBatching_Control_Kind_Name(
-                      control_kind) +
-                  " for " + model_name);
-        }
-
-        if (c.int32_false_true_size() > 0) {
-          if (c.int32_false_true_size() != 2) {
-            return Status(
-                Status::Code::INVALID_ARG,
-                "sequence batching control 'int32_false_true' must have "
-                "exactly 2 entries for " +
-                    inference::ModelSequenceBatching_Control_Kind_Name(
-                        control_kind) +
-                    " for " + model_name);
-          }
-
-          if (tensor_datatype != nullptr) {
-            *tensor_datatype = inference::DataType::TYPE_INT32;
-          }
-          if (int32_false_value != nullptr) {
-            *int32_false_value = c.int32_false_true(0);
-          }
-          if (int32_true_value != nullptr) {
-            *int32_true_value = c.int32_false_true(1);
-          }
-        } else if (c.fp32_false_true_size() > 0) {
-          if (c.fp32_false_true_size() != 2) {
-            return Status(
-                Status::Code::INVALID_ARG,
-                "sequence batching control 'fp32_false_true' must have exactly "
-                "2 entries for " +
-                    inference::ModelSequenceBatching_Control_Kind_Name(
-                        control_kind) +
-                    " for " + model_name);
-          }
-
-          if (tensor_datatype != nullptr) {
-            *tensor_datatype = inference::DataType::TYPE_FP32;
-          }
-          if (fp32_false_value != nullptr) {
-            *fp32_false_value = c.fp32_false_true(0);
-          }
-          if (fp32_true_value != nullptr) {
-            *fp32_true_value = c.fp32_false_true(1);
-          }
-        } else {
-          if (c.bool_false_true_size() != 2) {
-            return Status(
-                Status::Code::INVALID_ARG,
-                "sequence batching control 'bool_false_true' must have exactly "
-                "2 entries for " +
-                    inference::ModelSequenceBatching_Control_Kind_Name(
-                        control_kind) +
-                    " for " + model_name);
-          }
-
-          if (tensor_datatype != nullptr) {
-            *tensor_datatype = inference::DataType::TYPE_BOOL;
-          }
-          if (bool_false_value != nullptr) {
-            *bool_false_value = c.bool_false_true(0);
-          }
-          if (bool_true_value != nullptr) {
-            *bool_true_value = c.bool_false_true(1);
-          }
-        }
-      }
-    }
-  }
-
-  if (!seen_control) {
-    if (required) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "sequence batching control tensor must specify a " +
-              inference::ModelSequenceBatching_Control_Kind_Name(control_kind) +
-              " value for " + model_name);
-    }
-
-    tensor_name->clear();
-  }
-
-  return Status::Success;
-}
-
-Status
-GetTypedSequenceControlProperties(
-    const inference::ModelSequenceBatching& batcher,
-    const std::string& model_name,
-    const inference::ModelSequenceBatching::Control::Kind control_kind,
-    const bool required, std::string* tensor_name,
-    inference::DataType* tensor_datatype)
-{
-  // Make sure same tensor is not configured for multiple controls
-  std::set<std::string> seen_tensors;
-
-  // Make sure the control kind is not mentioned multiple times.
-  bool seen_control = false;
-
-  for (const auto& control_input : batcher.control_input()) {
-    if (control_input.name().empty()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "sequence batching control tensor must have a name for " +
-              model_name);
-    }
-
-    if (seen_tensors.find(control_input.name()) != seen_tensors.end()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "sequence batching control tensor '" + control_input.name() +
-              "' is specified for multiple control kinds for " + model_name);
-    }
-
-    seen_tensors.insert(control_input.name());
-
-    for (const auto& c : control_input.control()) {
-      if (c.kind() == control_kind) {
-        if (seen_control) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "sequence batching specifies multiple " +
-                  inference::ModelSequenceBatching_Control_Kind_Name(
-                      control_kind) +
-                  " tensors for " + model_name);
-        }
-
-        *tensor_name = control_input.name();
-        if (tensor_datatype != nullptr) {
-          *tensor_datatype = c.data_type();
-        }
-
-        seen_control = true;
-
-        if ((c.int32_false_true_size() > 0) || (c.fp32_false_true_size() > 0) ||
-            (c.bool_false_true_size() > 0)) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "sequence batching must not specify either 'int32_false_true', "
-              "'fp32_false_true' or 'bool_false_true' for " +
-                  inference::ModelSequenceBatching_Control_Kind_Name(
-                      control_kind) +
-                  " for " + model_name);
-        }
-      }
-    }
-  }
-
-  if (!seen_control) {
-    if (required) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "sequence batching control tensor must specify a " +
-              inference::ModelSequenceBatching_Control_Kind_Name(control_kind) +
-              " value for " + model_name);
-    }
-
-    tensor_name->clear();
-  }
-
-  return Status::Success;
-}
-
-Status
-GetNormalizedModelConfig(
-    const std::string& model_name, const std::string& path,
-    const double min_compute_capability, inference::ModelConfig* config)
-{
-  // Server-side autofill only sets certain backend fields for the models that
-  // belong to limited backends for backwards-compatibility. See TensorRT
-  // backend, ONNX Runtime backend, OpenVINO backend, TensorFLow backend, and
-  // PyTorch backend.
-  // Extracting detailed information is delegated to the backend implementation
-  // to auto-complete.
-  RETURN_IF_ERROR(
-      AutoCompleteBackendFields(model_name, std::string(path), config));
-  LOG_VERBOSE(1) << "Server side auto-completed config: "
-                 << config->DebugString();
-
-  RETURN_IF_ERROR(NormalizeModelConfig(min_compute_capability, config));
-
-  return Status::Success;
-}
-
-Status
-NormalizeModelConfig(
-    const double min_compute_capability, inference::ModelConfig* config)
-{
-  // If version_policy is not specified, default to Latest 1 version.
-  if (!config->has_version_policy()) {
-    inference::ModelVersionPolicy::Latest latest;
-    latest.set_num_versions(1);
-    config->mutable_version_policy()->mutable_latest()->CopyFrom(latest);
-  }
-
-  // If dynamic batching is specified...
-  if (config->has_dynamic_batching()) {
-    // If preferred batch size is not specified set it to
-    // max-batch-size.
-    if (config->dynamic_batching().preferred_batch_size().size() == 0) {
-      auto mutable_preferred_batch_size =
-          config->mutable_dynamic_batching()->mutable_preferred_batch_size();
-      if (config->max_batch_size() > 0) {
-        mutable_preferred_batch_size->Add(config->max_batch_size());
-      }
-    }
-  }
-
-  // If sequence batching is specified...
-  if (config->has_sequence_batching()) {
-    // Set default idle is not specified.
-    if (config->sequence_batching().max_sequence_idle_microseconds() == 0) {
-      config->mutable_sequence_batching()->set_max_sequence_idle_microseconds(
-          SEQUENCE_IDLE_DEFAULT_MICROSECONDS);
-    }
-
-    if (config->sequence_batching().has_oldest()) {
-      // If preferred batch size is not specified set it to
-      // max-batch-size.
-      if (config->sequence_batching().oldest().preferred_batch_size().size() ==
-          0) {
-        auto mutable_preferred_batch_size =
-            config->mutable_sequence_batching()
-                ->mutable_oldest()
-                ->mutable_preferred_batch_size();
-        if (config->max_batch_size() > 0) {
-          mutable_preferred_batch_size->Add(config->max_batch_size());
-        }
-      }
-    }
-  }
-
-  // If model ensembling is specified, don't attempt to normalize instance_group
-  // as it is not allowed in ensemble scheduling
-  if (!config->has_ensemble_scheduling()) {
-    auto optimization = config->mutable_optimization();
-    if (!optimization->has_input_pinned_memory()) {
-      optimization->mutable_input_pinned_memory()->set_enable(true);
-    }
-    if (!optimization->has_output_pinned_memory()) {
-      optimization->mutable_output_pinned_memory()->set_enable(true);
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-NormalizeInstanceGroup(
-    const double min_compute_capability,
-    const std::vector<inference::ModelInstanceGroup>& preferred_groups,
-    inference::ModelConfig* config)
-{
-  // Instance group setting doesn't apply to ensemble
-  if (config->has_ensemble_scheduling()) {
-    return Status::Success;
-  }
-
-  // Creates a set of supported GPU device ids
-  std::set<int> supported_gpus;
-#ifdef TRITON_ENABLE_GPU
-  // Get the total number of GPUs from the runtime library.
-  Status status = GetSupportedGPUs(&supported_gpus, min_compute_capability);
-  if (!status.IsOk()) {
-    return status;
-  }
-
-#endif  // TRITON_ENABLE_GPU
-
-  // Make sure there is at least one instance_group.
-  if (config->instance_group().empty()) {
-    inference::ModelInstanceGroup* group = config->add_instance_group();
-    group->set_name(config->name());
-
-    for (const auto& pg : preferred_groups) {
-      group->set_kind(pg.kind());
-      group->set_count(pg.count());
-      // handle preferred GPU setting differently based on kind
-      if (pg.kind() == inference::ModelInstanceGroup::KIND_GPU) {
-        // Don't use preferred group with KIND_GPU if there is no GPU.
-        if (supported_gpus.empty()) {
-          continue;
-        }
-        // If preferred group sets GPUs, limit deployment onto those that
-        // are also listed in supported gpus
-        if (!pg.gpus().empty()) {
-          for (const int32_t gid : pg.gpus()) {
-            if (supported_gpus.find(gid) != supported_gpus.end()) {
-              group->add_gpus(gid);
-            }
-          }
-        }
-        break;
-      } else if (pg.kind() == inference::ModelInstanceGroup::KIND_AUTO) {
-        // if AUTO, then set preferred GPU as is, to align with KIND_AUTO
-        // deduction specified below
-        for (const int32_t gid : pg.gpus()) {
-          group->add_gpus(gid);
-        }
-        break;
-      }
-      // Other kind should not set GPUs
-      break;
-    }
-  }
-
-  // Assign default name, kind and count to each instance group that
-  // doesn't give those values explicitly. For KIND_GPU, set GPUs to
-  // all available if not specified explicitly.
-  size_t cnt = 0;
-  for (auto& group : *config->mutable_instance_group()) {
-    // Name
-    if (group.name().empty()) {
-      group.set_name(config->name() + "_" + std::to_string(cnt));
-    }
-    cnt++;
-
-    // For KIND_AUTO... if there are no GPUs or if any of the listed
-    // 'gpu's are not present, then use KIND_CPU.
-    if (group.kind() == inference::ModelInstanceGroup::KIND_AUTO) {
-      if (supported_gpus.empty()) {
-        group.set_kind(inference::ModelInstanceGroup::KIND_CPU);
-      } else {
-        for (const int32_t gid : group.gpus()) {
-          if (supported_gpus.find(gid) == supported_gpus.end()) {
-            group.set_kind(inference::ModelInstanceGroup::KIND_CPU);
-            break;
-          }
-        }
-      }
-
-      if (group.kind() == inference::ModelInstanceGroup::KIND_AUTO) {
-        group.set_kind(inference::ModelInstanceGroup::KIND_GPU);
-      }
-    }
-
-    // KIND is resolved at this point
-    for (const auto& pg : preferred_groups) {
-      if (group.kind() != pg.kind()) {
-        continue;
-      }
-
-      // Limit the GPU setting within what is specified in the preferred group,
-      // if no available GPU then skip to next preferred group
-      if ((group.kind() == inference::ModelInstanceGroup::KIND_GPU) &&
-          group.gpus().empty() && !pg.gpus().empty()) {
-        for (const int32_t gid : pg.gpus()) {
-          if (supported_gpus.find(gid) != supported_gpus.end()) {
-            group.add_gpus(gid);
-          }
-        }
-        if (group.gpus().empty()) {
-          continue;
-        }
-      }
-      if ((group.count() < 1) && (pg.count() > 0)) {
-        group.set_count(pg.count());
-      }
-    }
-
-    // Set Triton default if the fields are not set from preferred group
-    // Count
-    if (group.count() < 1) {
-      RETURN_IF_ERROR(SetDefaultInstanceCount(&group, config->backend()));
-    }
-
-    // GPUs
-    if ((group.kind() == inference::ModelInstanceGroup::KIND_GPU) &&
-        (group.gpus().size() == 0)) {
-      for (auto d : supported_gpus) {
-        group.add_gpus(d);
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-LocalizePythonBackendExecutionEnvironmentPath(
-    const std::string& model_path, inference::ModelConfig* config,
-    std::shared_ptr<LocalizedPath>* localized_model_dir)
-{
-  if (config->backend() == "python") {
-    if (config->parameters().contains("EXECUTION_ENV_PATH")) {
-      // Read EXECUTION_ENV_PATH
-      std::string exec_env_path =
-          config->parameters().at("EXECUTION_ENV_PATH").string_value();
-      // Replace model directory variable with model_path
-      std::string model_dir_var = "$$TRITON_MODEL_DIRECTORY";
-      if (exec_env_path.substr(0, model_dir_var.size()) == model_dir_var) {
-        exec_env_path.replace(0, model_dir_var.size(), model_path);
-      }
-      // Collapse any .. in the path
-      std::string abs_exec_env_path;
-      std::size_t prev_pos = exec_env_path.size();
-      std::size_t pos = exec_env_path.find_last_of('/', prev_pos - 1);
-      int skip = 0;
-      while (pos != std::string::npos && prev_pos > 0) {
-        if (!skip) {
-          abs_exec_env_path =
-              exec_env_path.substr(pos, prev_pos - pos) + abs_exec_env_path;
-        }
-        skip = skip > 0 ? skip - 1 : skip;
-        if (pos >= 3 && exec_env_path.substr(pos - 3, 3) == "/..") {
-          skip += 2;
-        }
-        prev_pos = pos;
-        pos = exec_env_path.find_last_of('/', prev_pos - 1);
-      }
-      abs_exec_env_path = exec_env_path.substr(0, prev_pos) + abs_exec_env_path;
-      // Localize iff abs_exec_env_path is outside the model directory
-      std::string model_path_slash =
-          model_path.back() == '/' ? model_path : model_path + "/";
-      if (abs_exec_env_path.substr(0, model_path_slash.size()) !=
-          model_path_slash) {
-        // Localize the file
-        std::shared_ptr<LocalizedPath> localized_exec_env_path;
-        RETURN_IF_ERROR(
-            LocalizePath(abs_exec_env_path, &localized_exec_env_path));
-        // Persist the localized temporary path
-        (*localized_model_dir)
-            ->other_localized_path.push_back(localized_exec_env_path);
-        // Rewrite EXECUTION_ENV_PATH
-        config->mutable_parameters()
-            ->at("EXECUTION_ENV_PATH")
-            .set_string_value(localized_exec_env_path->Path());
-      }
-    }
-  }
-  return Status::Success;
-}
-
-Status
-SetDefaultInstanceCount(
-    inference::ModelInstanceGroup* group, const std::string& backend)
-{
-  group->set_count(1);
-
-  // Backends opt into the default_cpu_instance_count since
-  // some backends (pytorch, OpenVINO) don't perform well/have high overhead
-  // when using multiple instances.
-  const int default_cpu_instance_count = 2;
-  bool use_default_cpu_instance_count =
-      (backend == kTensorFlowBackend) || (backend == kOnnxRuntimeBackend);
-  if (group->kind() == inference::ModelInstanceGroup::KIND_CPU &&
-      use_default_cpu_instance_count) {
-    group->set_count(default_cpu_instance_count);
-  }
-
-  return Status::Success;
-}
-
-Status
-AutoCompleteBackendFields(
-    const std::string& model_name, const std::string& model_path,
-    inference::ModelConfig* config)
-{
-  std::set<std::string> version_dirs;
-  RETURN_IF_ERROR(GetDirectorySubdirs(model_path, &version_dirs));
-
-  // There must be at least one version directory that we can inspect to
-  // attempt to determine the platform. If not, we skip autofill with file name.
-  // For now we allow multiple versions and only inspect the first verison
-  // directory to ensure it is valid. We can add more aggressive checks later.
-  const bool has_version = (version_dirs.size() != 0);
-  const auto version_path =
-      has_version ? JoinPath({model_path, *(version_dirs.begin())}) : "";
-  std::set<std::string> version_dir_content;
-  if (has_version) {
-    RETURN_IF_ERROR(GetDirectoryContents(version_path, &version_dir_content));
-  }
-
-  // If the model name is not given in the configuration, set if based
-  // on the model path.
-  if (config->name().empty()) {
-    config->set_name(model_name);
-  }
-
-  // Trying to fill the 'backend', 'default_model_filename' field.
-
-  // TensorFlow
-  // For TF backend, the platform is required
-  if (config->platform().empty()) {
-    // Check 'backend', 'default_model_filename', and the actual directory
-    // to determine the platform
-    if (config->backend().empty() ||
-        (config->backend() == kTensorFlowBackend)) {
-      if (config->default_model_filename() == kTensorFlowSavedModelFilename) {
-        config->set_platform(kTensorFlowSavedModelPlatform);
-      } else if (
-          config->default_model_filename() == kTensorFlowGraphDefFilename) {
-        config->set_platform(kTensorFlowGraphDefPlatform);
-      } else if (config->default_model_filename().empty() && has_version) {
-        bool is_dir = false;
-        if (version_dir_content.find(kTensorFlowSavedModelFilename) !=
-            version_dir_content.end()) {
-          RETURN_IF_ERROR(IsDirectory(
-              JoinPath({version_path, kTensorFlowSavedModelFilename}),
-              &is_dir));
-          if (is_dir) {
-            config->set_platform(kTensorFlowSavedModelPlatform);
-          }
-        }
-        if (version_dir_content.find(kTensorFlowGraphDefFilename) !=
-            version_dir_content.end()) {
-          RETURN_IF_ERROR(IsDirectory(
-              JoinPath({version_path, kTensorFlowGraphDefFilename}), &is_dir));
-          if (!is_dir) {
-            config->set_platform(kTensorFlowGraphDefPlatform);
-          }
-        }
-      }
-    }
-  }
-
-  // Fill 'backend' and 'default_model_filename' if missing
-  if ((config->platform() == kTensorFlowSavedModelPlatform) ||
-      (config->platform() == kTensorFlowGraphDefPlatform)) {
-    if (config->backend().empty()) {
-      config->set_backend(kTensorFlowBackend);
-    }
-    if (config->default_model_filename().empty()) {
-      if (config->platform() == kTensorFlowSavedModelPlatform) {
-        config->set_default_model_filename(kTensorFlowSavedModelFilename);
-      } else {
-        config->set_default_model_filename(kTensorFlowGraphDefFilename);
-      }
-    }
-    return Status::Success;
-  }
-
-  // TensorRT
-  if (config->backend().empty()) {
-    if ((config->platform() == kTensorRTPlanPlatform) ||
-        (config->default_model_filename() == kTensorRTPlanFilename)) {
-      config->set_backend(kTensorRTBackend);
-    } else if (
-        config->platform().empty() &&
-        config->default_model_filename().empty() && has_version) {
-      bool is_dir = false;
-      if (version_dir_content.find(kTensorRTPlanFilename) !=
-          version_dir_content.end()) {
-        RETURN_IF_ERROR(IsDirectory(
-            JoinPath({version_path, kTensorRTPlanFilename}), &is_dir));
-        if (!is_dir) {
-          config->set_backend(kTensorRTBackend);
-        }
-      }
-    }
-  }
-  if (config->backend() == kTensorRTBackend) {
-    if (config->platform().empty()) {
-      config->set_platform(kTensorRTPlanPlatform);
-    }
-    if (config->default_model_filename().empty()) {
-      config->set_default_model_filename(kTensorRTPlanFilename);
-    }
-    return Status::Success;
-  }
-
-  // ONNXRuntime
-  if (config->backend().empty()) {
-    if ((config->platform() == kOnnxRuntimeOnnxPlatform) ||
-        (config->default_model_filename() == kOnnxRuntimeOnnxFilename)) {
-      config->set_backend(kOnnxRuntimeBackend);
-    } else if (
-        config->platform().empty() &&
-        config->default_model_filename().empty() && has_version) {
-      if (version_dir_content.find(kOnnxRuntimeOnnxFilename) !=
-          version_dir_content.end()) {
-        // ONNX model can be a file or a directory in the case of large model
-        config->set_backend(kOnnxRuntimeBackend);
-      }
-    }
-  }
-  if (config->backend() == kOnnxRuntimeBackend) {
-    if (config->platform().empty()) {
-      config->set_platform(kOnnxRuntimeOnnxPlatform);
-    }
-    if (config->default_model_filename().empty()) {
-      config->set_default_model_filename(kOnnxRuntimeOnnxFilename);
-    }
-    return Status::Success;
-  }
-
-  // OpenVINO
-  if (config->backend().empty()) {
-    if (config->default_model_filename() == kOpenVINORuntimeOpenVINOFilename) {
-      config->set_backend(kOpenVINORuntimeBackend);
-    } else if (
-        config->platform().empty() &&
-        config->default_model_filename().empty() && has_version) {
-      if (version_dir_content.find(kOpenVINORuntimeOpenVINOFilename) !=
-          version_dir_content.end()) {
-        config->set_backend(kOpenVINORuntimeBackend);
-      }
-    }
-  }
-  if (config->backend() == kOpenVINORuntimeBackend) {
-    if (config->default_model_filename().empty()) {
-      config->set_default_model_filename(kOpenVINORuntimeOpenVINOFilename);
-    }
-    return Status::Success;
-  }
-
-  // PyTorch (TorchScript, LibTorch)
-  if (config->backend().empty()) {
-    if ((config->platform() == kPyTorchLibTorchPlatform) ||
-        (config->default_model_filename() == kPyTorchLibTorchFilename)) {
-      config->set_backend(kPyTorchBackend);
-    } else if (
-        config->platform().empty() &&
-        config->default_model_filename().empty() && has_version) {
-      bool is_dir = false;
-      if (version_dir_content.find(kPyTorchLibTorchFilename) !=
-          version_dir_content.end()) {
-        RETURN_IF_ERROR(IsDirectory(
-            JoinPath({version_path, kPyTorchLibTorchFilename}), &is_dir));
-        if (!is_dir) {
-          config->set_backend(kPyTorchBackend);
-        }
-      }
-    }
-  }
-  if (config->backend() == kPyTorchBackend) {
-    if (config->platform().empty()) {
-      config->set_platform(kPyTorchLibTorchPlatform);
-    }
-    if (config->default_model_filename().empty()) {
-      config->set_default_model_filename(kPyTorchLibTorchFilename);
-    }
-    return Status::Success;
-  }
-
-  // Python
-  if (config->backend().empty()) {
-    if (config->default_model_filename() == kPythonFilename) {
-      config->set_backend(kPythonBackend);
-    } else if (
-        config->platform().empty() &&
-        config->default_model_filename().empty() && has_version) {
-      if (version_dir_content.find(kPythonFilename) !=
-          version_dir_content.end()) {
-        config->set_backend(kPythonBackend);
-      }
-    }
-  }
-  if (config->backend() == kPythonBackend) {
-    if (config->default_model_filename().empty()) {
-      config->set_default_model_filename(kPythonFilename);
-    }
-    return Status::Success;
-  }
-
-  // Custom Backend
-  // For now, only do the narrowest case, where no info is given in the config.
-  if (config->backend().empty() && config->platform().empty() &&
-      config->default_model_filename().empty()) {
-    LOG_VERBOSE(1) << "Could not infer supported backend, so attempting "
-                      "autofill of custom backend.";
-    // Since we lazily load the backends, we let the model tell us what backend
-    // to load. We must assume that if the model name conforms to the required
-    // shape, we parse the backend name out of the model file name. i.e.
-    // model.identity will set the backend to "identity".
-    const std::string delimiter = ".";
-    size_t pos = model_name.find(delimiter, 0);
-    if (pos == std::string::npos) {
-      return Status(
-          triton::common::Error::Code::INVALID_ARG,
-          ("Invalid model name: Could not determine backend for model '" +
-           model_name +
-           "' with no backend in model configuration. Expected model name of "
-           "the form 'model.<backend_name>'."));
-    }
-    const std::string backend_name =
-        model_name.substr(pos + 1, std::string::npos);
-    config->set_backend(backend_name);
-    config->set_default_model_filename(
-        (std::string("model.") + backend_name).c_str());
-    return Status::Success;
-  }
-
-  return Status::Success;
-}
-
-Status
-ValidateModelIOConfig(const inference::ModelConfig& config)
-{
-  Status status;
-  for (const auto& io : config.input()) {
-    status = ValidateModelInput(io, config.max_batch_size(), config.platform());
-    if (!status.IsOk()) {
-      return Status(
-          status.StatusCode(), status.Message() + " for " + config.name());
-    }
-  }
-  for (const auto& io : config.output()) {
-    status =
-        ValidateModelOutput(io, config.max_batch_size(), config.platform());
-    if (!status.IsOk()) {
-      return Status(
-          status.StatusCode(), status.Message() + " for " + config.name());
-    }
-  }
-  status = ValidateBatchIO(config);
-  if (!status.IsOk()) {
-    return Status(
-        status.StatusCode(), status.Message() + " for " + config.name());
-  }
-  return Status::Success;
-}
-
-Status
-ValidateBatchIO(const inference::ModelConfig& config)
-{
-  std::set<std::string> input_names;
-  std::set<std::string> output_names;
-  for (const auto& io : config.input()) {
-    input_names.emplace(io.name());
-  }
-  for (const auto& io : config.output()) {
-    output_names.emplace(io.name());
-  }
-  for (const auto& batch_io : config.batch_input()) {
-    switch (batch_io.kind()) {
-      case inference::BatchInput::BATCH_ELEMENT_COUNT:
-      case inference::BatchInput::BATCH_ACCUMULATED_ELEMENT_COUNT:
-      case inference::BatchInput::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO:
-      case inference::BatchInput::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE:
-      case inference::BatchInput::BATCH_ITEM_SHAPE:
-      case inference::BatchInput::BATCH_ITEM_SHAPE_FLATTEN: {
-        if (batch_io.source_input_size() != 1) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "batch input kind '" +
-                  inference::BatchInput::Kind_Name(batch_io.kind()) +
-                  "' expects 1 source input, got " +
-                  std::to_string(batch_io.source_input_size()));
-        }
-        break;
-      }
-      default:
-        return Status(
-            Status::Code::INVALID_ARG,
-            "unknown batch input kind '" +
-                inference::BatchInput::Kind_Name(batch_io.kind()) + "'");
-    }
-    if ((batch_io.data_type() != inference::DataType::TYPE_INT32) &&
-        (batch_io.data_type() != inference::DataType::TYPE_FP32)) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "batch input data type must be TYPE_INT32 or TYPE_FP32");
-    }
-    for (const auto& source_name : batch_io.source_input()) {
-      if (input_names.find(source_name) == input_names.end()) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "unknown source input name '" + source_name + "'");
-      }
-    }
-  }
-
-  for (const auto& batch_io : config.batch_output()) {
-    switch (batch_io.kind()) {
-      case inference::BatchOutput::BATCH_SCATTER_WITH_INPUT_SHAPE: {
-        if (batch_io.source_input_size() != 1) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "batch output kind '" +
-                  inference::BatchOutput::Kind_Name(batch_io.kind()) +
-                  "' expects 1 source input, got " +
-                  std::to_string(batch_io.source_input_size()));
-        }
-        break;
-      }
-      default:
-        return Status(
-            Status::Code::INVALID_ARG,
-            "unknown batch output kind '" +
-                inference::BatchOutput::Kind_Name(batch_io.kind()) + "'");
-    }
-    for (const auto& source_name : batch_io.source_input()) {
-      if (input_names.find(source_name) == input_names.end()) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "unknown source input name '" + source_name + "'");
-      }
-    }
-    std::set<std::string> target_names;
-    for (const auto& target_name : batch_io.target_name()) {
-      if (output_names.find(target_name) == output_names.end()) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "unknown target output name '" + target_name + "'");
-      }
-      if (target_names.emplace(target_name).second == false) {
-        return Status(
-            Status::Code::INVALID_ARG, "target output name '" + target_name +
-                                           "' can only be specified once");
-      }
-    }
-  }
-  return Status::Success;
-}
-
-Status
-ValidateModelConfig(
-    const inference::ModelConfig& config, const double min_compute_capability)
-{
-  if (config.name().empty()) {
-    return Status(
-        Status::Code::INVALID_ARG, "model configuration must specify 'name'");
-  }
-
-  if (config.backend().empty()) {
-    // Expect backend is not empty unless it is ensemble platform.
-#ifdef TRITON_ENABLE_ENSEMBLE
-    if (config.platform() != kEnsemblePlatform)
-#endif  // TRITON_ENABLE_ENSEMBLE
-      return Status(
-          Status::Code::INVALID_ARG, "unexpected platform type '" +
-                                         config.platform() + "' for " +
-                                         config.name());
-  }
-#ifdef TRITON_ENABLE_ENSEMBLE
-  else if (config.platform() == kEnsemblePlatform) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "Ensemble model '" + config.name() + "' must have platform type '" +
-            config.platform() + "' and empty backend type");
-  }
-#endif  // TRITON_ENABLE_ENSEMBLE
-
-  if (config.platform().empty() && config.backend().empty()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "must specify 'platform' or 'backend' for '" + config.name() + "'");
-  }
-
-  // Ensure both platform and backend are referring to known backend,
-  // or both referring to unknown backend for user-provided backend.
-  if (GetBackendTypeFromPlatform(config.platform()) !=
-      GetBackendType(config.backend())) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "unexpected 'platform' and 'backend' pair, got:" + config.platform() +
-            ", " + config.backend());
-  }
-
-  if (config.max_batch_size() < 0) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "'max_batch_size' must be non-negative value for " + config.name());
-  }
-
-  if (!config.has_version_policy()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "must specify 'version policy' for " + config.name());
-  }
-
-  // If dynamic batching is specified make sure the preferred batch
-  // sizes are positive and don't exceed maximum batch size.
-  if (config.has_dynamic_batching()) {
-    for (const auto size : config.dynamic_batching().preferred_batch_size()) {
-      if (size <= 0) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "dynamic batching preferred size must be positive for " +
-                config.name());
-      }
-      if (size > config.max_batch_size()) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "dynamic batching preferred size must be <= max batch size for " +
-                config.name());
-      }
-    }
-
-    // Priority queue is specified
-    const auto priority_levels = config.dynamic_batching().priority_levels();
-    if (priority_levels != 0) {
-      if ((config.dynamic_batching().default_priority_level() == 0) ||
-          (config.dynamic_batching().default_priority_level() >
-           priority_levels)) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "default priority level must be in range [1, " +
-                std::to_string(priority_levels) + "] for " + config.name());
-      }
-      for (const auto& queue_policy :
-           config.dynamic_batching().priority_queue_policy()) {
-        if ((queue_policy.first == 0) ||
-            (queue_policy.first > priority_levels)) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "priority queue policy must have priority level in range [1, " +
-                  std::to_string(priority_levels) + "] for " + config.name());
-        }
-      }
-    }
-
-    // preserve ordering option will conflict with priorities and delay policy
-    if (config.dynamic_batching().preserve_ordering()) {
-      if (priority_levels > 1) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "Only one priority level is allowed when 'preserve_ordering' is "
-            "true for " +
-                config.name());
-      }
-      const auto& default_policy =
-          config.dynamic_batching().default_queue_policy();
-      if ((default_policy.default_timeout_microseconds() != 0) &&
-          (default_policy.timeout_action() ==
-           inference::ModelQueuePolicy::DELAY)) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "Queue policy can not have DELAY as timeout action when "
-            "'preserve_ordering' is true for " +
-                config.name());
-      }
-      // Also need to check policy in 'priority_queue_policy'
-      // for single priority case
-      for (const auto& policy :
-           config.dynamic_batching().priority_queue_policy()) {
-        if ((policy.second.default_timeout_microseconds() != 0) &&
-            (policy.second.timeout_action() ==
-             inference::ModelQueuePolicy::DELAY)) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "Queue policy can not have DELAY as timeout action when "
-              "'preserve_ordering' is true for " +
-                  config.name());
-        }
-      }
-    }
-  }
-
-  // If sequence batching is specified make sure the control is
-  // specified correctly.
-  if (config.has_sequence_batching()) {
-    const auto& batcher = config.sequence_batching();
-
-    // Check boolean controls...
-    std::string tensor_name;
-    RETURN_IF_ERROR(GetBooleanSequenceControlProperties(
-        batcher, config.name(),
-        inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_START,
-        false /* required */, &tensor_name, nullptr, nullptr, nullptr, nullptr,
-        nullptr, nullptr, nullptr));
-    RETURN_IF_ERROR(GetBooleanSequenceControlProperties(
-        batcher, config.name(),
-        inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_END,
-        false /* required */, &tensor_name, nullptr, nullptr, nullptr, nullptr,
-        nullptr, nullptr, nullptr));
-    RETURN_IF_ERROR(GetBooleanSequenceControlProperties(
-        batcher, config.name(),
-        inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_READY,
-        false /* required */, &tensor_name, nullptr, nullptr, nullptr, nullptr,
-        nullptr, nullptr, nullptr));
-
-    // Check CORRID control and make sure it is one of the allowed types.
-    inference::DataType tensor_datatype;
-    RETURN_IF_ERROR(GetTypedSequenceControlProperties(
-        batcher, config.name(),
-        inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_CORRID,
-        false /* required */, &tensor_name, &tensor_datatype));
-    if (!tensor_name.empty()) {
-      if ((tensor_datatype != inference::DataType::TYPE_UINT64) &&
-          (tensor_datatype != inference::DataType::TYPE_INT64) &&
-          (tensor_datatype != inference::DataType::TYPE_UINT32) &&
-          (tensor_datatype != inference::DataType::TYPE_INT32) &&
-          (tensor_datatype != inference::DataType::TYPE_STRING)) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "unexpected data type for control " +
-                inference::ModelSequenceBatching_Control_Kind_Name(
-                    inference::ModelSequenceBatching::Control::
-                        CONTROL_SEQUENCE_CORRID) +
-                " for " + config.name() +
-                ". Allowed data types are TYPE_UINT64, TYPE_INT64, "
-                "TYPE_UINT32, "
-                "TYPE_INT32 and TYPE_STRING");
-      }
-    }
-
-    // If oldest-first strategy is enabled make sure the preferred
-    // batch sizes are positive and don't exceed maximum batch size.
-    if (config.sequence_batching().has_oldest()) {
-      for (const auto size :
-           config.sequence_batching().oldest().preferred_batch_size()) {
-        if (size <= 0) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "sequence batching preferred batch size must be positive for " +
-                  config.name());
-        }
-        if (size > config.max_batch_size()) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "sequence batching preferred batch size must be <= max batch "
-              "size for " +
-                  config.name());
-        }
-      }
-    }
-
-    // If direct strategy is enabled make sure the minimum slot utilization is
-    // in range (0.0, 1.0]
-    if (config.sequence_batching().has_direct()) {
-      if ((config.sequence_batching().direct().minimum_slot_utilization() <
-           0.0) ||
-          (config.sequence_batching().direct().minimum_slot_utilization() >
-           1.0)) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "sequence batching minimum slot utilization must be in range "
-            "(0.0, 1.0] for " +
-                config.name());
-      }
-    }
-  }
-
-  // If ensemble scheduling is specified, validate it.  Otherwise,
-  // must validate platform and instance_group
-  if (config.has_ensemble_scheduling()) {
-#ifdef TRITON_ENABLE_ENSEMBLE
-    RETURN_IF_ERROR(ValidateEnsembleSchedulingConfig(config));
-#else
-    return Status(
-        Status::Code::INVALID_ARG, "ensemble scheduling not supported");
-#endif  // TRITON_ENABLE_ENSEMBLE
-  }
-#ifdef TRITON_ENABLE_ENSEMBLE
-  else if (config.platform() == kEnsemblePlatform) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "ensemble scheduling must be set for ensemble " + config.name() +
-            " whose platform is " + kEnsemblePlatform);
-  }
-#endif  // TRITON_ENABLE_ENSEMBLE
-
-  // FIXME: DLIS-3916 - Response Cache does not yet support decoupled models
-  if (config.model_transaction_policy().decoupled() &&
-      config.response_cache().enable()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "Response Cache does not currently support model " + config.name() +
-            " with 'decoupled' transaction policy. Please disable the response"
-            " cache.");
-  }
-
-  return Status::Success;
-}
-
-Status
-ValidateInstanceGroup(
-    const inference::ModelConfig& config, const double min_compute_capability)
-{
-  // Instance group setting doesn't apply to ensemble
-  if (config.has_ensemble_scheduling()) {
-    return Status::Success;
-  }
-
-  if (config.instance_group().size() == 0) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "must specify one or more 'instance group's for " + config.name());
-  }
-
-  // Make sure KIND_GPU instance group specifies at least one GPU and
-  // doesn't specify a non-existent GPU. Make sure non-KIND_GPU does
-  // not specify any GPUs.
-#ifdef TRITON_ENABLE_GPU
-  std::set<int> supported_gpus;
-  Status status = GetSupportedGPUs(&supported_gpus, min_compute_capability);
-  if (!status.IsOk()) {
-    return status;
-  }
-#endif  // TRITON_ENABLE_GPU
-
-  for (const auto& group : config.instance_group()) {
-    if (group.kind() == inference::ModelInstanceGroup::KIND_MODEL) {
-      if (group.gpus().size() > 0) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "instance group " + group.name() + " of model " + config.name() +
-                " has kind KIND_MODEL but specifies one or more GPUs");
-      }
-    } else if (group.kind() == inference::ModelInstanceGroup::KIND_GPU) {
-#if !defined(TRITON_ENABLE_GPU) && !defined(TRITON_ENABLE_MALI_GPU)
-      return Status(
-          Status::Code::INVALID_ARG,
-          "instance group " + group.name() + " of model " + config.name() +
-              " has kind KIND_GPU but server does not support GPUs");
-#elif defined(TRITON_ENABLE_GPU)
-      if (group.gpus().size() == 0) {
-        if (supported_gpus.size() == 0) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "instance group " + group.name() + " of model " + config.name() +
-                  " has kind KIND_GPU but no GPUs are available");
-        } else {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "instance group " + group.name() + " of model " + config.name() +
-                  " has kind KIND_GPU but specifies no GPUs");
-        }
-      }
-
-      for (const int32_t gid : group.gpus()) {
-        if (supported_gpus.find(gid) == supported_gpus.end()) {
-          std::string supported_gpus_str;
-          for (const auto& cc : supported_gpus) {
-            if (!supported_gpus_str.empty()) {
-              supported_gpus_str += ", ";
-            }
-            supported_gpus_str += std::to_string(cc);
-          }
-          return Status(
-              Status::Code::INVALID_ARG,
-              "instance group " + group.name() + " of model " + config.name() +
-                  " specifies invalid or unsupported gpu id " +
-                  std::to_string(gid) +
-                  ". GPUs with at least the minimum required CUDA compute "
-                  "compatibility of " +
-                  std::to_string(min_compute_capability) +
-                  " are: " + supported_gpus_str);
-        }
-      }
-#endif  // ! TRITON_ENABLE_GPU && ! TRITON_ENABLE_MALI_GPU
-    } else if (group.kind() == inference::ModelInstanceGroup::KIND_CPU) {
-      if (group.gpus().size() > 0) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "instance group " + group.name() + " of model " + config.name() +
-                " has kind KIND_CPU but specifies one or more GPUs");
-      }
-    } else {
-      return Status(
-          Status::Code::INTERNAL, "instance group " + group.name() +
-                                      " of model " + config.name() +
-                                      " has unexpected kind KIND_AUTO");
-    }
-
-    if ((config.platform() != kTensorRTPlanPlatform) &&
-        !group.profile().empty()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "instance group " + group.name() + " of model " + config.name() +
-              " and platform " + config.platform() +
-              "specifies profile field which is only supported for "
-              "TensorRT models");
-    } else if (!group.profile().empty()) {
-      for (const auto& profile : group.profile()) {
-        int profile_index;
-        RETURN_IF_ERROR(GetProfileIndex(profile, &profile_index));
-        if (profile_index < 0) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "instance group " + group.name() + " of model " + config.name() +
-                  " and platform " + config.platform() +
-                  " specifies invalid profile " + profile +
-                  ". The field should contain the string representation of a "
-                  "non-negative integer.");
-        }
-      }
-    }
-  }
-  return Status::Success;
-}
-
-Status
-ValidateModelInput(
-    const inference::ModelInput& io, int32_t max_batch_size,
-    const std::string& platform)
-{
-  RETURN_IF_ERROR(ValidateIOShape(io, max_batch_size, "model input "));
-
-  if (((io.format() == inference::ModelInput::FORMAT_NHWC) ||
-       (io.format() == inference::ModelInput::FORMAT_NCHW)) &&
-      (io.dims_size() != 3)) {
-    return Status(
-        Status::Code::INVALID_ARG, "model input NHWC/NCHW require 3 dims");
-  }
-
-  if ((platform != kTensorRTPlanPlatform) && io.is_shape_tensor()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "shape tensors are only supported for TensorRT platform");
-  }
-
-  return Status::Success;
-}
-
-Status
-CheckAllowedModelInput(
-    const inference::ModelInput& io, const std::set<std::string>& allowed)
-{
-  if (allowed.find(io.name()) == allowed.end()) {
-    std::string astr;
-    for (const auto& a : allowed) {
-      if (!astr.empty()) {
-        astr.append(", ");
-      }
-      astr.append(a);
-    }
-
-    return Status(
-        Status::Code::INVALID_ARG, "unexpected inference input '" + io.name() +
-                                       "', allowed inputs are: " + astr);
-  }
-  return Status::Success;
-}
-
-Status
-ValidateModelOutput(
-    const inference::ModelOutput& io, int32_t max_batch_size,
-    const std::string& platform)
-{
-  RETURN_IF_ERROR(ValidateIOShape(io, max_batch_size, "model output "));
-
-  if ((platform != kTensorRTPlanPlatform) && io.is_shape_tensor()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "shape tensors are only supported for TensorRT platform");
-  }
-
-  return Status::Success;
-}
-
-Status
-CheckAllowedModelOutput(
-    const inference::ModelOutput& io, const std::set<std::string>& allowed)
-{
-  if (allowed.find(io.name()) == allowed.end()) {
-    std::string astr;
-    for (const auto& a : allowed) {
-      if (!astr.empty()) {
-        astr.append(", ");
-      }
-      astr.append(a);
-    }
-
-    return Status(
-        Status::Code::INVALID_ARG, "unexpected inference output '" + io.name() +
-                                       "', allowed outputs are: " + astr);
-  }
-
-  return Status::Success;
-}
-
-Status
-ParseBoolParameter(
-    const std::string& key, std::string value, bool* parsed_value)
-{
-  std::transform(
-      value.begin(), value.end(), value.begin(),
-      [](unsigned char c) { return std::tolower(c); });
-
-  if ((value == "true") || (value == "1")) {
-    *parsed_value = true;
-  } else if ((value == "false") || (value == "0")) {
-    *parsed_value = false;
-  } else {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "failed to convert " + key + " '" + value + "' to boolean value");
-  }
-
-  return Status::Success;
-}
-
-Status
-ParseLongLongParameter(
-    const std::string& key, const std::string& value, int64_t* parsed_value)
-{
-  try {
-    *parsed_value = std::stoll(value);
-  }
-  catch (const std::invalid_argument& ia) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "failed to convert " + key + " '" + value + "' to integral number");
-  }
-
-  return Status::Success;
-}
-
-Status
-GetProfileIndex(const std::string& profile_name, int* profile_index)
-{
-  if (profile_name.empty()) {
-    return Status(Status::Code::INVALID_ARG, "profile name must not be empty");
-  }
-
-  try {
-    *profile_index = stoi(profile_name);
-  }
-  catch (const std::invalid_argument& ia) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "unable to parse '" + profile_name + "': " + ia.what());
-  }
-
-  return Status::Success;
-}
-
-namespace {
-
-Status
-CollectInt64Fields(
-    google::protobuf::Message* message, const std::string& prefix,
-    std::set<std::string>* int64_fields)
-{
-  const google::protobuf::Descriptor* desc = message->GetDescriptor();
-  const google::protobuf::Reflection* refl = message->GetReflection();
-  for (int i = 0; i < desc->field_count(); ++i) {
-    const google::protobuf::FieldDescriptor* field = desc->field(i);
-    const std::string fullname = prefix + "::" + field->name();
-    switch (field->type()) {
-      case google::protobuf::FieldDescriptor::TYPE_MESSAGE: {
-        if (field->is_repeated()) {
-          int rsize = refl->FieldSize(*message, field);
-          if (rsize == 0) {
-            refl->AddMessage(message, field);
-          }
-
-          rsize = refl->FieldSize(*message, field);
-          for (int r = 0; r < rsize; ++r) {
-            RETURN_IF_ERROR(CollectInt64Fields(
-                refl->MutableRepeatedMessage(message, field, r), fullname,
-                int64_fields));
-          }
-        } else {
-          RETURN_IF_ERROR(CollectInt64Fields(
-              refl->MutableMessage(message, field), fullname, int64_fields));
-        }
-      } break;
-
-      case google::protobuf::FieldDescriptor::TYPE_INT64:
-      case google::protobuf::FieldDescriptor::TYPE_UINT64:
-      case google::protobuf::FieldDescriptor::TYPE_SINT64:
-      case google::protobuf::FieldDescriptor::TYPE_FIXED64:
-      case google::protobuf::FieldDescriptor::TYPE_SFIXED64:
-        int64_fields->insert(fullname);
-        break;
-
-      default:
-        break;
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-ValidateModelConfigInt64()
-{
-  // Must initialize a dummy ModelConfig so that all fields are
-  // visited.
-  inference::ModelConfig config;
-
-  std::set<std::string> int64_fields;
-  RETURN_IF_ERROR(CollectInt64Fields(&config, "ModelConfig", &int64_fields));
-
-  LOG_VERBOSE(1) << "ModelConfig 64-bit fields:";
-  for (const auto& f : int64_fields) {
-    LOG_VERBOSE(1) << "\t" << f;
-  }
-
-  // We expect to find exactly the following fields. If we get an
-  // error from this code ModelConfig has added or removed a 64-bit
-  // field and we need to adjust here and in ModelConfigToJson below.
-  std::set<std::string> expected{
-      "ModelConfig::input::dims",
-      "ModelConfig::input::reshape::shape",
-      "ModelConfig::output::dims",
-      "ModelConfig::output::reshape::shape",
-      "ModelConfig::version_policy::specific::versions",
-      "ModelConfig::dynamic_batching::max_queue_delay_microseconds",
-      "ModelConfig::dynamic_batching::default_queue_policy::default_timeout_"
-      "microseconds",
-      "ModelConfig::dynamic_batching::priority_queue_policy::value::default_"
-      "timeout_microseconds",
-      "ModelConfig::sequence_batching::direct::max_queue_delay_microseconds",
-      "ModelConfig::sequence_batching::state::dims",
-      "ModelConfig::sequence_batching::state::initial_state::dims",
-      "ModelConfig::sequence_batching::oldest::max_queue_delay_microseconds",
-      "ModelConfig::sequence_batching::max_sequence_idle_microseconds",
-      "ModelConfig::ensemble_scheduling::step::model_version",
-      "ModelConfig::model_warmup::inputs::value::dims",
-      "ModelConfig::optimization::cuda::graph_spec::input::value::dim",
-      "ModelConfig::optimization::cuda::graph_spec::graph_lower_bound::input::"
-      "value::dim",
-      "ModelConfig::instance_group::secondary_devices::device_id"};
-
-  if (int64_fields != expected) {
-    return Status(
-        Status::Code::INTERNAL, "ModelConfig 64-bit field needs update");
-  }
-
-  return Status::Success;
-}
-
-Status
-FixInt(
-    triton::common::TritonJson::Value& document,
-    triton::common::TritonJson::Value& io, const std::string& name)
-{
-  triton::common::TritonJson::Value str_value;
-  if (!io.Find(name.c_str(), &str_value)) {
-    return Status::Success;
-  }
-
-  std::string str;
-  RETURN_IF_ERROR(str_value.AsString(&str));
-
-  int64_t d;
-  try {
-    d = std::atoll(str.c_str());
-  }
-  catch (...) {
-    return Status(
-        Status::Code::INTERNAL,
-        (std::string("unable to convert '") + str + "' to integer"));
-  }
-
-  str_value.SetInt(d);
-
-  return Status::Success;
-}
-
-Status
-FixIntArray(
-    triton::common::TritonJson::Value& document,
-    triton::common::TritonJson::Value& io, const std::string& name)
-{
-  triton::common::TritonJson::Value fixed_shape_array(
-      document, triton::common::TritonJson::ValueType::ARRAY);
-
-  if (!io.Find(name.c_str())) {
-    return Status::Success;
-  }
-
-  triton::common::TritonJson::Value shape_array;
-  RETURN_IF_ERROR(io.MemberAsArray(name.c_str(), &shape_array));
-  for (size_t i = 0; i < shape_array.ArraySize(); ++i) {
-    std::string str;
-    RETURN_IF_ERROR(shape_array.IndexAsString(i, &str));
-
-    int64_t d;
-    try {
-      d = std::atoll(str.c_str());
-    }
-    catch (...) {
-      return Status(
-          Status::Code::INTERNAL,
-          (std::string("unable to convert '") + str + "' to integer"));
-    }
-
-    RETURN_IF_ERROR(fixed_shape_array.AppendInt(d));
-  }
-
-  shape_array.Swap(fixed_shape_array);
-  fixed_shape_array.Release();
-
-  return Status::Success;
-}
-
-Status
-FixObjectArray(
-    triton::common::TritonJson::Value& document,
-    triton::common::TritonJson::Value& arr, const std::string& name)
-{
-  for (size_t i = 0; i < arr.ArraySize(); ++i) {
-    triton::common::TritonJson::Value obj;
-    RETURN_IF_ERROR(arr.IndexAsObject(i, &obj));
-    RETURN_IF_ERROR(FixInt(document, obj, name));
-  }
-
-  return Status::Success;
-}
-
-}  // namespace
-
-Status
-ModelConfigToJson(
-    const inference::ModelConfig& config, const uint32_t config_version,
-    std::string* json_str)
-{
-  // Currently only support 'config_version' 1, which is the json
-  // representation of the ModelConfig protobuf with the int64 fields
-  // fixes to be actual numbers instead of the string madness done by
-  // protobuf.
-  if (config_version != 1) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        std::string("model configuration version ") +
-            std::to_string(config_version) +
-            " not supported, supported versions are: 1");
-  }
-
-  // Config will have 0 byte size if all fields are with default value,
-  // in other word the config is empty.
-  if (config.ByteSizeLong() == 0) {
-    json_str->clear();
-    return Status::Success;
-  }
-
-  std::string config_json_str;
-  ::google::protobuf::util::JsonPrintOptions options;
-  options.preserve_proto_field_names = true;
-  options.always_print_primitive_fields = true;
-  ::google::protobuf::util::MessageToJsonString(
-      config, &config_json_str, options);
-
-  // We need to verify that every field 64-bit field in the
-  // ModelConfig protobuf is being handled. We hardcode the known
-  // fields and check just once to make sure everything has been
-  // handled. We could have this check in a separately compiled CI
-  // test but it is convenient to keep it here close to the code below
-  // that actually fixes the 64-bit fields.
-  {
-    static std::once_flag fonce;
-    Status status = Status::Success;
-    std::call_once(fonce, [&status] { status = ValidateModelConfigInt64(); });
-    RETURN_IF_ERROR(status);
-  }
-
-  // In the json produced by protobuf, int64 and uint64 values are
-  // represented as strings. Protobuf doesn't provide an option to
-  // disable this (sigh) so we need to fix it up here as we want the
-  // json representation of the config to be reasonable json...
-  triton::common::TritonJson::Value config_json;
-  config_json.Parse(config_json_str);
-
-  // Fix input::dims, input::reshape::shape, output::dims,
-  // output::reshape::shape
-  for (std::string name : {"input", "output"}) {
-    triton::common::TritonJson::Value ios;
-    RETURN_IF_ERROR(config_json.MemberAsArray(name.c_str(), &ios));
-    for (size_t i = 0; i < ios.ArraySize(); ++i) {
-      triton::common::TritonJson::Value io;
-      RETURN_IF_ERROR(ios.IndexAsObject(i, &io));
-      RETURN_IF_ERROR(FixIntArray(config_json, io, "dims"));
-
-      triton::common::TritonJson::Value reshape;
-      if (io.Find("reshape", &reshape)) {
-        RETURN_IF_ERROR(FixIntArray(config_json, reshape, "shape"));
-      }
-    }
-  }
-
-  // Fix version_policy::specific::versions
-  {
-    triton::common::TritonJson::Value vp;
-    if (config_json.Find("version_policy", &vp)) {
-      triton::common::TritonJson::Value specific;
-      if (vp.Find("specific", &specific)) {
-        RETURN_IF_ERROR(FixIntArray(config_json, specific, "versions"));
-      }
-    }
-  }
-
-  // Fix dynamic_batching::max_queue_delay_microseconds,
-  // dynamic_batching::default_queue_policy::default_timeout_microseconds,
-  // dynamic_batching::priority_queue_policy::value::default_timeout_microseconds
-  {
-    triton::common::TritonJson::Value db;
-    if (config_json.Find("dynamic_batching", &db)) {
-      RETURN_IF_ERROR(FixInt(config_json, db, "max_queue_delay_microseconds"));
-      triton::common::TritonJson::Value dqp;
-      if (db.Find("default_queue_policy", &dqp)) {
-        RETURN_IF_ERROR(
-            FixInt(config_json, dqp, "default_timeout_microseconds"));
-      }
-      triton::common::TritonJson::Value pqp;
-      if (db.Find("priority_queue_policy", &pqp)) {
-        // Iterate over each member in 'pqp' and fix...
-        std::vector<std::string> members;
-        RETURN_IF_ERROR(pqp.Members(&members));
-        for (const auto& m : members) {
-          triton::common::TritonJson::Value el;
-          RETURN_IF_ERROR(pqp.MemberAsObject(m.c_str(), &el));
-          RETURN_IF_ERROR(
-              FixInt(config_json, el, "default_timeout_microseconds"));
-        }
-      }
-    }
-  }
-
-  // Fix sequence_batching::oldest::max_queue_delay_microseconds,
-  // sequence_batching::direct::max_queue_delay_microseconds,
-  // sequence_batching::max_sequence_idle_microseconds
-  {
-    triton::common::TritonJson::Value sb;
-    if (config_json.Find("sequence_batching", &sb)) {
-      RETURN_IF_ERROR(
-          FixInt(config_json, sb, "max_sequence_idle_microseconds"));
-      triton::common::TritonJson::Value oldest;
-      if (sb.Find("oldest", &oldest)) {
-        RETURN_IF_ERROR(
-            FixInt(config_json, oldest, "max_queue_delay_microseconds"));
-      }
-      triton::common::TritonJson::Value direct;
-      if (sb.Find("direct", &direct)) {
-        RETURN_IF_ERROR(
-            FixInt(config_json, direct, "max_queue_delay_microseconds"));
-      }
-
-      triton::common::TritonJson::Value states;
-      if (sb.Find("state", &states)) {
-        for (size_t i = 0; i < states.ArraySize(); ++i) {
-          triton::common::TritonJson::Value state;
-          RETURN_IF_ERROR(states.IndexAsObject(i, &state));
-          RETURN_IF_ERROR(FixIntArray(config_json, state, "dims"));
-
-          triton::common::TritonJson::Value initial_state;
-          if (sb.Find("initial_state", &initial_state)) {
-            RETURN_IF_ERROR(FixIntArray(config_json, initial_state, "dims"));
-          }
-        }
-      }
-    }
-  }
-
-  // Fix ensemble_scheduling::step::model_version.
-  {
-    triton::common::TritonJson::Value ens;
-    if (config_json.Find("ensemble_scheduling", &ens)) {
-      triton::common::TritonJson::Value step;
-      if (ens.Find("step", &step)) {
-        RETURN_IF_ERROR(FixObjectArray(config_json, step, "model_version"));
-      }
-    }
-  }
-
-  // Fix model_warmup::inputs::value::dims.
-  {
-    triton::common::TritonJson::Value warmups;
-    if (config_json.Find("model_warmup", &warmups)) {
-      for (size_t i = 0; i < warmups.ArraySize(); ++i) {
-        triton::common::TritonJson::Value warmup;
-        RETURN_IF_ERROR(warmups.IndexAsObject(i, &warmup));
-        triton::common::TritonJson::Value inputs;
-        if (warmup.Find("inputs", &inputs)) {
-          std::vector<std::string> members;
-          RETURN_IF_ERROR(inputs.Members(&members));
-          for (const auto& m : members) {
-            triton::common::TritonJson::Value input;
-            RETURN_IF_ERROR(inputs.MemberAsObject(m.c_str(), &input));
-            RETURN_IF_ERROR(FixIntArray(config_json, input, "dims"));
-          }
-        }
-      }
-    }
-  }
-
-  // Convert fixed json back the string...
-  triton::common::TritonJson::WriteBuffer buffer;
-  RETURN_IF_ERROR(config_json.Write(&buffer));
-  *json_str = std::move(buffer.MutableContents());
-
-  return Status::Success;
-}
-
-Status
-JsonToModelConfig(
-    const std::string& json_config, const uint32_t config_version,
-    inference::ModelConfig* protobuf_config)
-{
-  // Currently only support 'config_version' 1, which is the json
-  // representation of the ModelConfig protobuf matches the representation in
-  // ModelConfigToJson().
-  if (config_version != 1) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        std::string("model configuration version ") +
-            std::to_string(config_version) +
-            " not supported, supported versions are: 1");
-  }
-
-  ::google::protobuf::util::JsonParseOptions options;
-  options.case_insensitive_enum_parsing = true;
-  options.ignore_unknown_fields = false;
-  auto err = ::google::protobuf::util::JsonStringToMessage(
-      json_config, protobuf_config, options);
-  if (!err.ok()) {
-    return Status(Status::Code::INVALID_ARG, std::string(err.message()));
-  }
-
-  return Status::Success;
-}
-
-BackendType
-GetBackendTypeFromPlatform(const std::string& platform_name)
-{
-  if ((platform_name == kTensorFlowGraphDefPlatform) ||
-      (platform_name == kTensorFlowSavedModelPlatform)) {
-    return BackendType::BACKEND_TYPE_TENSORFLOW;
-  }
-
-  if (platform_name == kTensorRTPlanPlatform) {
-    return BackendType::BACKEND_TYPE_TENSORRT;
-  }
-
-  if (platform_name == kOnnxRuntimeOnnxPlatform) {
-    return BackendType::BACKEND_TYPE_ONNXRUNTIME;
-  }
-
-  if (platform_name == kPyTorchLibTorchPlatform) {
-    return BackendType::BACKEND_TYPE_PYTORCH;
-  }
-
-  return BackendType::BACKEND_TYPE_UNKNOWN;
-}
-
-/// Get the BackendType value for a backend name.
-/// \param backend_name The backend name.
-/// \return The BackendType or BackendType::UNKNOWN if the platform string
-/// is not recognized.
-BackendType
-GetBackendType(const std::string& backend_name)
-{
-  if (backend_name == kTensorFlowBackend) {
-    return BackendType::BACKEND_TYPE_TENSORFLOW;
-  }
-
-  if (backend_name == kTensorRTBackend) {
-    return BackendType::BACKEND_TYPE_TENSORRT;
-  }
-
-  if (backend_name == kOnnxRuntimeBackend) {
-    return BackendType::BACKEND_TYPE_ONNXRUNTIME;
-  }
-
-  if (backend_name == kPyTorchBackend) {
-    return BackendType::BACKEND_TYPE_PYTORCH;
-  }
-
-  return BackendType::BACKEND_TYPE_UNKNOWN;
-}
-
-TRITONSERVER_DataType
-DataTypeToTriton(const inference::DataType dtype)
-{
-  switch (dtype) {
-    case inference::DataType::TYPE_BOOL:
-      return TRITONSERVER_TYPE_BOOL;
-    case inference::DataType::TYPE_UINT8:
-      return TRITONSERVER_TYPE_UINT8;
-    case inference::DataType::TYPE_UINT16:
-      return TRITONSERVER_TYPE_UINT16;
-    case inference::DataType::TYPE_UINT32:
-      return TRITONSERVER_TYPE_UINT32;
-    case inference::DataType::TYPE_UINT64:
-      return TRITONSERVER_TYPE_UINT64;
-    case inference::DataType::TYPE_INT8:
-      return TRITONSERVER_TYPE_INT8;
-    case inference::DataType::TYPE_INT16:
-      return TRITONSERVER_TYPE_INT16;
-    case inference::DataType::TYPE_INT32:
-      return TRITONSERVER_TYPE_INT32;
-    case inference::DataType::TYPE_INT64:
-      return TRITONSERVER_TYPE_INT64;
-    case inference::DataType::TYPE_FP16:
-      return TRITONSERVER_TYPE_FP16;
-    case inference::DataType::TYPE_FP32:
-      return TRITONSERVER_TYPE_FP32;
-    case inference::DataType::TYPE_FP64:
-      return TRITONSERVER_TYPE_FP64;
-    case inference::DataType::TYPE_STRING:
-      return TRITONSERVER_TYPE_BYTES;
-    case inference::DataType::TYPE_BF16:
-      return TRITONSERVER_TYPE_BF16;
-    default:
-      break;
-  }
-
-  return TRITONSERVER_TYPE_INVALID;
-}
-
-inference::DataType
-TritonToDataType(const TRITONSERVER_DataType dtype)
-{
-  switch (dtype) {
-    case TRITONSERVER_TYPE_BOOL:
-      return inference::DataType::TYPE_BOOL;
-    case TRITONSERVER_TYPE_UINT8:
-      return inference::DataType::TYPE_UINT8;
-    case TRITONSERVER_TYPE_UINT16:
-      return inference::DataType::TYPE_UINT16;
-    case TRITONSERVER_TYPE_UINT32:
-      return inference::DataType::TYPE_UINT32;
-    case TRITONSERVER_TYPE_UINT64:
-      return inference::DataType::TYPE_UINT64;
-    case TRITONSERVER_TYPE_INT8:
-      return inference::DataType::TYPE_INT8;
-    case TRITONSERVER_TYPE_INT16:
-      return inference::DataType::TYPE_INT16;
-    case TRITONSERVER_TYPE_INT32:
-      return inference::DataType::TYPE_INT32;
-    case TRITONSERVER_TYPE_INT64:
-      return inference::DataType::TYPE_INT64;
-    case TRITONSERVER_TYPE_FP16:
-      return inference::DataType::TYPE_FP16;
-    case TRITONSERVER_TYPE_FP32:
-      return inference::DataType::TYPE_FP32;
-    case TRITONSERVER_TYPE_FP64:
-      return inference::DataType::TYPE_FP64;
-    case TRITONSERVER_TYPE_BYTES:
-      return inference::DataType::TYPE_STRING;
-    case TRITONSERVER_TYPE_BF16:
-      return inference::DataType::TYPE_BF16;
-    default:
-      break;
-  }
-
-  return inference::DataType::TYPE_INVALID;
-}
-
-}}  // namespace triton::core
--- a/3rdparty/core-r22.12/src/model_config_utils.h
+++ b/3rdparty/core-r22.12/src/model_config_utils.h
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "model_config.pb.h"
-#include "status.h"
-#include "triton/common/model_config.h"
-#include "tritonserver_apis.h"
-#include "filesystem.h"
-
-namespace triton { namespace core {
-
-/// Enumeration for the different backend types.
-enum BackendType {
-  BACKEND_TYPE_UNKNOWN = 0,
-  BACKEND_TYPE_TENSORRT = 1,
-  BACKEND_TYPE_TENSORFLOW = 2,
-  BACKEND_TYPE_ONNXRUNTIME = 3,
-  BACKEND_TYPE_PYTORCH = 4
-};
-
-// Get version of a model from the path containing the model
-/// definition file.
-/// \param path The path to the model definition file.
-/// \param version Returns the version.
-/// \return The error status.
-Status GetModelVersionFromPath(const std::string& path, int64_t* version);
-
-/// Get the tensor name, false value, and true value for a boolean
-/// sequence batcher control kind. If 'required' is true then must
-/// find a tensor for the control. If 'required' is false, return
-/// 'tensor_name' as empty-string if the control is not mapped to any
-/// tensor.
-Status GetBooleanSequenceControlProperties(
-    const inference::ModelSequenceBatching& batcher,
-    const std::string& model_name,
-    const inference::ModelSequenceBatching::Control::Kind control_kind,
-    const bool required, std::string* tensor_name,
-    inference::DataType* tensor_datatype, float* fp32_false_value,
-    float* fp32_true_value, int32_t* int32_false_value,
-    int32_t* int32_true_value, bool* bool_false_value, bool* bool_true_value);
-
-/// Get the tensor name and datatype for a non-boolean sequence
-/// batcher control kind. If 'required' is true then must find a
-/// tensor for the control. If 'required' is false, return
-/// 'tensor_name' as empty-string if the control is not mapped to any
-/// tensor. 'tensor_datatype' returns the required datatype for the
-/// control.
-Status GetTypedSequenceControlProperties(
-    const inference::ModelSequenceBatching& batcher,
-    const std::string& model_name,
-    const inference::ModelSequenceBatching::Control::Kind control_kind,
-    const bool required, std::string* tensor_name,
-    inference::DataType* tensor_datatype);
-
-/// Read a ModelConfig and normalize it as expected by model backends.
-/// \param path The full-path to the directory containing the
-/// model configuration.
-/// \param min_compute_capability The minimum support CUDA compute
-/// capability.
-/// \param config Returns the normalized model configuration.
-/// \return The error status.
-Status GetNormalizedModelConfig(
-    const std::string& model_name, const std::string& path,
-    const double min_compute_capability, inference::ModelConfig* config);
-
-/// Auto-complete backend related fields (platform, backend and default model
-/// filename) if not set, note that only Triton recognized backends will be
-/// checked.
-/// \param model_name The name of the model.
-/// \param model_path The full-path to the directory containing the
-/// model configuration.
-/// \param config Returns the auto-completed model configuration.
-/// \return The error status.
-Status AutoCompleteBackendFields(
-    const std::string& model_name, const std::string& model_path,
-    inference::ModelConfig* config);
-
-/// Detects and adds missing fields in the model configuration.
-/// \param min_compute_capability The minimum supported CUDA compute
-/// capability.
-/// \param config The model configuration
-/// \return The error status
-Status NormalizeModelConfig(
-    const double min_compute_capability, inference::ModelConfig* config);
-
-/// [FIXME] better formalize config normalization / validation
-/// Detects and adds missing fields in instance group setting.
-/// \param min_compute_capability The minimum supported CUDA compute
-/// capability.
-/// \param config The model configuration
-/// \return The error status
-Status NormalizeInstanceGroup(
-    const double min_compute_capability,
-    const std::vector<inference::ModelInstanceGroup>& preferred_groups,
-    inference::ModelConfig* config);
-
-/// [FIXME] Remove once a more permanent solution is implemented  (DLIS-4211)
-/// Localize EXECUTION_ENV_PATH in python backend.
-/// \param model_path The full-path to the directory containing the model
-/// configuration, before localization.
-/// \param config The model configuration
-/// \param localized_model_dir The localized model directory
-/// \return The error status
-Status LocalizePythonBackendExecutionEnvironmentPath(
-    const std::string& model_path, inference::ModelConfig* config,
-    std::shared_ptr<LocalizedPath>* localized_model_dir);
-
-/// Auto-complete the instance count based on instance kind and backend name.
-/// \param group The instance group to set the count for.
-/// \param backend The backend name to check against.
-/// \return The error status.
-Status SetDefaultInstanceCount(
-    inference::ModelInstanceGroup* group, const std::string& backend);
-
-/// Validate that a model is specified correctly, except for model inputs
-/// and outputs. ValidateModelIOConfig() should be called to
-/// validate model inputs and outputs.
-/// \param config The model configuration to validate.
-/// \param min_compute_capability The minimum support CUDA compute
-/// capability.
-/// \return The error status. A non-OK status indicates the configuration
-/// is not valid.
-Status ValidateModelConfig(
-    const inference::ModelConfig& config, const double min_compute_capability);
-
-/// [FIXME] better formalize config normalization / validation
-/// Validate instance group setting.
-/// \param config The model configuration to validate.
-/// \param min_compute_capability The minimum support CUDA compute
-/// capability.
-/// \return The error status. A non-OK status indicates the configuration
-/// is not valid.
-Status ValidateInstanceGroup(
-    const inference::ModelConfig& config, const double min_compute_capability);
-
-/// Validate that a model inputs and outputs are specified correctly.
-/// \param config The model configuration to validate.
-/// \return The error status. A non-OK status indicates the configuration
-/// is not valid.
-Status ValidateModelIOConfig(const inference::ModelConfig& config);
-
-/// Validate that input is specified correctly in a model
-/// configuration.
-/// \param io The model input.
-/// \param max_batch_size The max batch size specified in model configuration.
-/// \param platform The platform name
-/// \return The error status. A non-OK status indicates the input
-/// is not valid.
-Status ValidateModelInput(
-    const inference::ModelInput& io, int32_t max_batch_size,
-    const std::string& platform);
-
-/// Validate that an input matches one of the allowed input names.
-/// \param io The model input.
-/// \param allowed The set of allowed input names.
-/// \return The error status. A non-OK status indicates the input
-/// is not valid.
-Status CheckAllowedModelInput(
-    const inference::ModelInput& io, const std::set<std::string>& allowed);
-
-/// Validate that an output is specified correctly in a model
-/// configuration.
-/// \param io The model output.
-/// \param max_batch_size The max batch size specified in model configuration.
-/// \param platform The platform name
-/// \return The error status. A non-OK status indicates the output
-/// is not valid.
-Status ValidateModelOutput(
-    const inference::ModelOutput& io, int32_t max_batch_size,
-    const std::string& platform);
-
-/// Validate that an output matches one of the allowed output names.
-/// \param io The model output.
-/// \param allowed The set of allowed output names.
-/// \return The error status. A non-OK status indicates the output
-/// is not valid.
-Status CheckAllowedModelOutput(
-    const inference::ModelOutput& io, const std::set<std::string>& allowed);
-
-/// Validate that a model batch inputs and batch outputs are specified
-/// correctly.
-/// \param config The model configuration to validate..
-/// \return The error status. A non-OK status indicates the batch inputs or
-/// batch outputs are not valid.
-Status ValidateBatchIO(const inference::ModelConfig& config);
-
-/// Parse the 'value' of the parameter 'key' into a boolean value.
-/// \param key The name of the parameter.
-/// \param value The value of the parameter in string.
-/// \param parsed_value Return the boolean of the parameter.
-/// \return The error status. A non-OK status indicates failure on parsing the
-/// value.
-Status ParseBoolParameter(
-    const std::string& key, std::string value, bool* parsed_value);
-
-/// Parse the 'value' of the parameter 'key' into a long long integer value.
-/// \param key The name of the parameter.
-/// \param value The value of the parameter in string.
-/// \param parsed_value Return the numerical value of the parameter.
-/// \return The error status. A non-OK status indicates failure on parsing the
-/// value.
-Status ParseLongLongParameter(
-    const std::string& key, const std::string& value, int64_t* parsed_value);
-
-/// Obtain the 'profile_index' of the 'profile_name'.
-/// \param profile_name The name of the profile.
-/// \param profile_index Return the index of the profile.
-/// \return The error status. A non-OK status indicates failure on getting the
-/// value.
-Status GetProfileIndex(const std::string& profile_name, int* profile_index);
-
-/// Convert a model configuration protobuf to the equivalent json.
-/// \param config The protobuf model configuration.
-/// \param config_version The model configuration will be returned in
-/// a format matching this version. If the configuration cannot be
-/// represented in the requested version's format then an error will
-/// be returned.
-/// \param json Returns the equivalent JSON.
-/// \return The error status.
-Status ModelConfigToJson(
-    const inference::ModelConfig& config, const uint32_t config_version,
-    std::string* json_str);
-
-/// Convert a model configuration JSON to the equivalent protobuf.
-/// \param config The JSON model configuration.
-/// \param config_version The model configuration will be returned in
-/// a format matching this version. If the configuration cannot be
-/// represented in the requested version's format then an error will
-/// be returned.
-/// \param protobuf Returns the equivalent protobuf.
-/// \return The error status.
-Status JsonToModelConfig(
-    const std::string& json_config, const uint32_t config_version,
-    inference::ModelConfig* protobuf_config);
-
-/// Get the BackendType value for a platform name.
-/// \param platform_name The platform name.
-/// \return The BackendType or BackendType::UNKNOWN if the platform string
-/// is not recognized.
-BackendType GetBackendTypeFromPlatform(const std::string& platform_name);
-
-/// Get the BackendType value for a backend name.
-/// \param backend_name The backend name.
-/// \return The BackendType or BackendType::UNKNOWN if the platform string
-/// is not recognized.
-BackendType GetBackendType(const std::string& backend_name);
-
-/// Get the Triton server data type corresponding to a data type.
-/// \param dtype The data type.
-/// \return The Triton server data type.
-TRITONSERVER_DataType DataTypeToTriton(const inference::DataType dtype);
-
-/// Get the data type corresponding to a Triton server data type.
-/// \param dtype The Triton server data type.
-/// \return The data type.
-inference::DataType TritonToDataType(const TRITONSERVER_DataType dtype);
-
-}}  // namespace triton::core