Initial commit

c68e1835 · lijian6 · c68e1835 · c68e1835 · c68e1835 · c68e1835
Commit c68e1835 authored Sep 18, 2023 by lijian6
20 changed files
--- a/src/c++/perf_analyzer/mock_data_loader.h
+++ b/src/c++/perf_analyzer/mock_data_loader.h
+// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include "data_loader.h"
+#include "gmock/gmock.h"
+
+namespace triton { namespace perfanalyzer {
+
+/// Mock DataLoader class used for testing to allow JSON data to be read
+/// from string, rather than file.
+///
+class NaggyMockDataLoader : public DataLoader {
+ public:
+  NaggyMockDataLoader() { SetupMocks(); }
+  NaggyMockDataLoader(size_t batch_size) : DataLoader(batch_size)
+  {
+    SetupMocks();
+  }
+
+  void SetupMocks()
+  {
+    ON_CALL(*this, GetTotalSteps(testing::_))
+        .WillByDefault([this](size_t stream_id) -> size_t {
+          return this->DataLoader::GetTotalSteps(stream_id);
+        });
+    ON_CALL(*this, ReadFile(testing::_, testing::_))
+        .WillByDefault(
+            [this](
+                const std::string& path,
+                std::vector<char>* contents) -> cb::Error {
+              return this->DataLoader::ReadFile(path, contents);
+            });
+    ON_CALL(*this, ReadTextFile(testing::_, testing::_))
+        .WillByDefault(
+            [this](
+                const std::string& path,
+                std::vector<std::string>* contents) -> cb::Error {
+              return this->DataLoader::ReadTextFile(path, contents);
+            });
+  }
+
+  MOCK_METHOD(size_t, GetTotalSteps, (size_t), (override));
+  MOCK_METHOD(cb::Error, ReadFile, (const std::string&, std::vector<char>*));
+  MOCK_METHOD(
+      cb::Error, ReadTextFile, (const std::string&, std::vector<std::string>*));
+
+  cb::Error ReadDataFromJSON(
+      const std::shared_ptr<ModelTensorMap>& inputs,
+      const std::shared_ptr<ModelTensorMap>& outputs,
+      const std::string& json_file) override
+  {
+    return ReadDataFromStr(json_file, inputs, outputs);
+  }
+
+  cb::Error ReadDataFromStr(
+      const std::string& str, const std::shared_ptr<ModelTensorMap>& inputs,
+      const std::shared_ptr<ModelTensorMap>& outputs)
+  {
+    rapidjson::Document d{};
+    const unsigned int parseFlags = rapidjson::kParseNanAndInfFlag;
+    d.Parse<parseFlags>(str.c_str());
+
+    return ParseData(d, inputs, outputs);
+  };
+
+  std::vector<size_t>& step_num_{DataLoader::step_num_};
+  size_t& data_stream_cnt_{DataLoader::data_stream_cnt_};
+};
+
+// Non-naggy version of Mock Data Loader (won't warn when using default gmock
+// mocked function)
+using MockDataLoader = testing::NiceMock<NaggyMockDataLoader>;
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/mock_infer_context.h
+++ b/src/c++/perf_analyzer/mock_infer_context.h
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include "gmock/gmock.h"
+#include "infer_context.h"
+
+namespace triton { namespace perfanalyzer {
+
+class NaggyMockInferContext : public InferContext {
+ public:
+  NaggyMockInferContext()
+  {
+    ON_CALL(*this, SendRequest(testing::_, testing::_, testing::_))
+        .WillByDefault(
+            [this](
+                const uint64_t request_id, const bool delayed,
+                const uint64_t sequence_id) -> void {
+              this->InferContext::SendRequest(request_id, delayed, sequence_id);
+            });
+  }
+
+  MOCK_METHOD(
+      void, SendRequest, (const uint64_t, const bool, const uint64_t),
+      (override));
+
+  std::shared_ptr<SequenceManager>& sequence_manager_{
+      InferContext::sequence_manager_};
+  std::shared_ptr<DataLoader>& data_loader_{InferContext::data_loader_};
+  std::shared_ptr<IInferDataManager>& infer_data_manager_{
+      InferContext::infer_data_manager_};
+  std::shared_ptr<ThreadStat>& thread_stat_{InferContext::thread_stat_};
+  std::reference_wrapper<const bool>& execute_{InferContext::execute_};
+  bool& using_json_data_{InferContext::using_json_data_};
+  bool& async_{InferContext::async_};
+  bool& streaming_{InferContext::streaming_};
+  InferData& infer_data_{InferContext::infer_data_};
+  std::unique_ptr<cb::ClientBackend>& infer_backend_{
+      InferContext::infer_backend_};
+  std::function<void(cb::InferResult*)>& async_callback_func_{
+      InferContext::async_callback_func_};
+};
+
+using MockInferContext = testing::NiceMock<NaggyMockInferContext>;
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/mock_infer_data_manager.h
+++ b/src/c++/perf_analyzer/mock_infer_data_manager.h
+// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include "gmock/gmock.h"
+#include "infer_data_manager.h"
+#include "infer_data_manager_shm.h"
+#include "mock_client_backend.h"
+
+namespace triton { namespace perfanalyzer {
+
+
+class MockInferDataManagerShm : public InferDataManagerShm {
+ public:
+  MockInferDataManagerShm(
+      const int32_t batch_size, const SharedMemoryType shared_memory_type,
+      const size_t output_shm_size, const std::shared_ptr<ModelParser>& parser,
+      const std::shared_ptr<cb::ClientBackendFactory>& factory,
+      const std::shared_ptr<DataLoader>& data_loader)
+      : InferDataManagerShm(
+            batch_size, shared_memory_type, output_shm_size, parser, factory,
+            data_loader)
+  {
+  }
+
+  // Mocked version of the CopySharedMemory method in loadmanager.
+  // Tracks the mapping of shared memory label to data
+  //
+  cb::Error CopySharedMemory(
+      uint8_t* input_shm_ptr, const std::vector<TensorData>& input_datas,
+      bool is_shape_tensor, std::string& region_name) override
+  {
+    std::vector<int32_t> vals;
+
+    for (size_t i = 0; i < input_datas.size(); i++) {
+      int32_t val = *reinterpret_cast<const int32_t*>(input_datas[i].data_ptr);
+      vals.push_back(val);
+    }
+    mocked_shared_memory_regions.insert(std::make_pair(region_name, vals));
+    return cb::Error::Success;
+  }
+
+  cb::Error CreateInferInput(
+      cb::InferInput** infer_input, const cb::BackendKind kind,
+      const std::string& name, const std::vector<int64_t>& dims,
+      const std::string& datatype) override
+  {
+    *infer_input = new cb::MockInferInput(kind, name, dims, datatype);
+    return cb::Error::Success;
+  }
+
+  // Tracks the mapping of shared memory label to data
+  std::map<std::string, std::vector<int32_t>> mocked_shared_memory_regions;
+};
+
+
+class MockInferDataManager : public InferDataManager {
+ public:
+  MockInferDataManager() { SetupMocks(); }
+
+  MockInferDataManager(
+      const size_t max_threads, const int32_t batch_size,
+      const std::shared_ptr<ModelParser>& parser,
+      const std::shared_ptr<cb::ClientBackendFactory>& factory,
+      const std::shared_ptr<DataLoader>& data_loader)
+      : InferDataManager(max_threads, batch_size, parser, factory, data_loader)
+  {
+    SetupMocks();
+  }
+
+  void SetupMocks()
+  {
+    ON_CALL(
+        *this, UpdateInferData(testing::_, testing::_, testing::_, testing::_))
+        .WillByDefault(
+            [this](
+                size_t thread_id, int stream_index, int step_index,
+                InferData& infer_data) -> cb::Error {
+              return this->InferDataManager::UpdateInferData(
+                  thread_id, stream_index, step_index, infer_data);
+            });
+  }
+
+  MOCK_METHOD(
+      cb::Error, UpdateInferData, (size_t, int, int, InferData&), (override));
+
+  cb::Error CreateInferInput(
+      cb::InferInput** infer_input, const cb::BackendKind kind,
+      const std::string& name, const std::vector<int64_t>& dims,
+      const std::string& datatype) override
+  {
+    *infer_input = new cb::MockInferInput(kind, name, dims, datatype);
+    return cb::Error::Success;
+  }
+};
+
+class MockInferDataManagerFactory {
+ public:
+  static std::shared_ptr<IInferDataManager> CreateMockInferDataManager(
+      const size_t max_threads, const int32_t batch_size,
+      const SharedMemoryType shared_memory_type, const size_t output_shm_size,
+      const std::shared_ptr<ModelParser>& parser,
+      const std::shared_ptr<cb::ClientBackendFactory>& factory,
+      const std::shared_ptr<DataLoader>& data_loader)
+  {
+    if (shared_memory_type == SharedMemoryType::NO_SHARED_MEMORY) {
+      return std::make_shared<testing::NiceMock<MockInferDataManager>>(
+          max_threads, batch_size, parser, factory, data_loader);
+    } else {
+      return std::make_shared<testing::NiceMock<MockInferDataManagerShm>>(
+          batch_size, shared_memory_type, output_shm_size, parser, factory,
+          data_loader);
+    }
+  }
+};
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/mock_inference_profiler.h
+++ b/src/c++/perf_analyzer/mock_inference_profiler.h
+// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include "gmock/gmock.h"
+#include "inference_profiler.h"
+
+namespace triton { namespace perfanalyzer {
+
+class NaggyMockInferenceProfiler : public InferenceProfiler {
+ public:
+  NaggyMockInferenceProfiler()
+  {
+    ON_CALL(
+        *this, ValidLatencyMeasurement(
+                   testing::_, testing::_, testing::_, testing::_, testing::_,
+                   testing::_))
+        .WillByDefault(
+            [this](
+                const std::pair<uint64_t, uint64_t>& valid_range,
+                size_t& valid_sequence_count, size_t& delayed_request_count,
+                std::vector<uint64_t>* latencies, size_t& response_count,
+                std::vector<RequestRecord>& valid_requests) -> void {
+              this->InferenceProfiler::ValidLatencyMeasurement(
+                  valid_range, valid_sequence_count, delayed_request_count,
+                  latencies, response_count, valid_requests);
+            });
+    ON_CALL(*this, SummarizeLatency(testing::_, testing::_))
+        .WillByDefault(
+            [this](
+                const std::vector<uint64_t>& latencies,
+                PerfStatus& summary) -> cb::Error {
+              return this->InferenceProfiler::SummarizeLatency(
+                  latencies, summary);
+            });
+    ON_CALL(*this, MergePerfStatusReports(testing::_, testing::_))
+        .WillByDefault(
+            [this](
+                std::deque<PerfStatus>& perf_status,
+                PerfStatus& summary_status) -> cb::Error {
+              return this->InferenceProfiler::MergePerfStatusReports(
+                  perf_status, summary_status);
+            });
+    ON_CALL(*this, MergeServerSideStats(testing::_, testing::_))
+        .WillByDefault(
+            [this](
+                std::vector<ServerSideStats>& server_side_stats,
+                ServerSideStats& server_side_summary) -> cb::Error {
+              return this->InferenceProfiler::MergeServerSideStats(
+                  server_side_stats, server_side_summary);
+            });
+    ON_CALL(
+        *this, SummarizeClientStat(
+                   testing::_, testing::_, testing::_, testing::_, testing::_,
+                   testing::_, testing::_, testing::_))
+        .WillByDefault(
+            [this](
+                const cb::InferStat& start_stat, const cb::InferStat& end_stat,
+                const uint64_t duration_ns, const size_t valid_request_count,
+                const size_t delayed_request_count,
+                const size_t valid_sequence_count, const size_t response_count,
+                PerfStatus& summary) -> cb::Error {
+              return this->InferenceProfiler::SummarizeClientStat(
+                  start_stat, end_stat, duration_ns, valid_request_count,
+                  delayed_request_count, valid_sequence_count, response_count,
+                  summary);
+            });
+  };
+
+  MOCK_METHOD0(IncludeServerStats, bool());
+  MOCK_METHOD(
+      void, ValidLatencyMeasurement,
+      ((const std::pair<uint64_t, uint64_t>&), size_t&, size_t&,
+       std::vector<uint64_t>*, size_t&, std::vector<RequestRecord>&),
+      (override));
+  MOCK_METHOD(
+      cb::Error, SummarizeLatency, (const std::vector<uint64_t>&, PerfStatus&),
+      (override));
+  MOCK_METHOD(
+      cb::Error, MergePerfStatusReports, (std::deque<PerfStatus>&, PerfStatus&),
+      (override));
+  MOCK_METHOD(
+      cb::Error, MergeServerSideStats,
+      (std::vector<ServerSideStats>&, ServerSideStats&), (override));
+  MOCK_METHOD(
+      cb::Error, SummarizeClientStat,
+      (const cb::InferStat&, const cb::InferStat&, const uint64_t, const size_t,
+       const size_t, const size_t, const size_t, PerfStatus&),
+      (override));
+
+  std::shared_ptr<ModelParser>& parser_{InferenceProfiler::parser_};
+  std::unique_ptr<LoadManager>& manager_{InferenceProfiler::manager_};
+  bool& include_lib_stats_{InferenceProfiler::include_lib_stats_};
+  std::vector<RequestRecord>& all_request_records_{
+      InferenceProfiler::all_request_records_};
+};
+
+using MockInferenceProfiler = testing::NiceMock<NaggyMockInferenceProfiler>;
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/mock_load_manager.h
+++ b/src/c++/perf_analyzer/mock_load_manager.h
+// Copyright 2023 (c), NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include "gmock/gmock.h"
+#include "load_manager.h"
+
+namespace triton { namespace perfanalyzer {
+
+class NaggyMockLoadManager : public LoadManager {};
+
+using MockLoadManager = testing::NiceMock<NaggyMockLoadManager>;
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/mock_model_parser.h
+++ b/src/c++/perf_analyzer/mock_model_parser.h
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#pragma once
+
+#include "model_parser.h"
+
+namespace triton { namespace perfanalyzer {
+
+class MockModelParser : public ModelParser {
+ public:
+  MockModelParser() : ModelParser(clientbackend::BackendKind::TRITON) {}
+
+  MockModelParser(
+      bool is_sequence_model, bool is_decoupled_model,
+      size_t max_batch_size = 64)
+      : ModelParser(clientbackend::BackendKind::TRITON)
+  {
+    if (is_sequence_model) {
+      scheduler_type_ = ModelParser::SEQUENCE;
+    }
+    is_decoupled_ = is_decoupled_model;
+    max_batch_size_ = max_batch_size;
+  }
+
+  // Expose private function
+  cb::Error GetInt(const rapidjson::Value& value, int64_t* integer_value)
+  {
+    return ModelParser::GetInt(value, integer_value);
+  }
+
+  // Expose private function
+  cb::Error DetermineComposingModelMap(
+      const std::vector<cb::ModelIdentifier>& bls_composing_models,
+      const rapidjson::Document& config,
+      std::unique_ptr<cb::ClientBackend>& backend)
+  {
+    return ModelParser::DetermineComposingModelMap(
+        bls_composing_models, config, backend);
+  }
+
+  // Expose private function
+  cb::Error DetermineSchedulerType(
+      const rapidjson::Document& config,
+      std::unique_ptr<cb::ClientBackend>& backend)
+  {
+    return ModelParser::DetermineSchedulerType(config, backend);
+  }
+
+  std::shared_ptr<ComposingModelMap>& composing_models_map_{
+      ModelParser::composing_models_map_};
+  std::shared_ptr<ModelTensorMap>& inputs_{ModelParser::inputs_};
+};
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/mock_profile_data_collector.h
+++ b/src/c++/perf_analyzer/mock_profile_data_collector.h
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include "gmock/gmock.h"
+#include "profile_data_collector.h"
+
+namespace triton { namespace perfanalyzer {
+
+class NaggyMockProfileDataCollector : public ProfileDataCollector {
+ public:
+  NaggyMockProfileDataCollector()
+  {
+    ON_CALL(*this, FindExperiment(testing::_))
+        .WillByDefault(
+            [this](InferenceLoadMode& id) -> std::vector<Experiment>::iterator {
+              return this->ProfileDataCollector::FindExperiment(id);
+            });
+  }
+
+  MOCK_METHOD(
+      std::vector<Experiment>::iterator, FindExperiment, (InferenceLoadMode&),
+      (override));
+
+  std::vector<Experiment>& experiments_{ProfileDataCollector::experiments_};
+};
+
+using MockProfileDataCollector =
+    testing::NiceMock<NaggyMockProfileDataCollector>;
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/mock_profile_data_exporter.h
+++ b/src/c++/perf_analyzer/mock_profile_data_exporter.h
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS"" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include "gmock/gmock.h"
+#include "profile_data_exporter.h"
+
+namespace triton { namespace perfanalyzer {
+
+class NaggyMockProfileDataExporter : public ProfileDataExporter {
+ public:
+  NaggyMockProfileDataExporter()
+  {
+    ON_CALL(*this, ConvertToJson(testing::_, testing::_))
+        .WillByDefault(
+            [this](
+                const std::vector<Experiment>& raw_experiments,
+                std::string& raw_version) -> void {
+              return this->ProfileDataExporter::ConvertToJson(
+                  raw_experiments, raw_version);
+            });
+
+    ON_CALL(*this, OutputToFile(testing::_))
+        .WillByDefault([this](std::string& file_path) -> void {
+          this->ProfileDataExporter::OutputToFile(file_path);
+        });
+
+    ON_CALL(*this, AddExperiment(testing::_, testing::_, testing::_))
+        .WillByDefault(
+            [this](
+                rapidjson::Value& entry, rapidjson::Value& experiment,
+                const Experiment& raw_experiment) -> void {
+              this->ProfileDataExporter::AddExperiment(
+                  entry, experiment, raw_experiment);
+            });
+  }
+
+  MOCK_METHOD(
+      void, ConvertToJson, (const std::vector<Experiment>&, std::string&),
+      (override));
+  MOCK_METHOD(
+      void, AddExperiment,
+      (rapidjson::Value&, rapidjson::Value&, const Experiment&), (override));
+  MOCK_METHOD(void, OutputToFile, (std::string&), (override));
+
+  rapidjson::Document& document_{ProfileDataExporter::document_};
+};
+
+using MockProfileDataExporter = testing::NiceMock<NaggyMockProfileDataExporter>;
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/mock_request_rate_worker.h
+++ b/src/c++/perf_analyzer/mock_request_rate_worker.h
+// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#pragma once
+
+#include "gmock/gmock.h"
+#include "request_rate_worker.h"
+
+namespace triton { namespace perfanalyzer {
+
+class NaggyMockRequestRateWorker : public RequestRateWorker {
+ public:
+  NaggyMockRequestRateWorker(
+      uint32_t id, std::shared_ptr<ThreadStat> thread_stat,
+      std::shared_ptr<ThreadConfig> thread_config,
+      const std::shared_ptr<ModelParser> parser,
+      std::shared_ptr<DataLoader> data_loader,
+      const std::shared_ptr<cb::ClientBackendFactory> factory,
+      const bool on_sequence_model, const bool async, const size_t max_threads,
+      const bool using_json_data, const bool streaming,
+      const int32_t batch_size, std::condition_variable& wake_signal,
+      std::mutex& wake_mutex, bool& execute,
+      std::chrono::steady_clock::time_point& start_time,
+      const bool serial_sequences,
+      const std::shared_ptr<IInferDataManager>& infer_data_manager,
+      std::shared_ptr<SequenceManager> sequence_manager)
+      : RequestRateWorker(
+            id, thread_stat, thread_config, parser, data_loader, factory,
+            on_sequence_model, async, max_threads, using_json_data, streaming,
+            batch_size, wake_signal, wake_mutex, execute, start_time,
+            serial_sequences, infer_data_manager, sequence_manager)
+  {
+    ON_CALL(*this, Infer()).WillByDefault([this]() -> void {
+      RequestRateWorker::Infer();
+    });
+  }
+
+  MOCK_METHOD(void, Infer, (), (override));
+
+  void CreateContext() override { RequestRateWorker::CreateContext(); }
+
+  void SendInferRequest()
+  {
+    if (thread_stat_->status_.IsOk()) {
+      LoadWorker::SendInferRequest(0, false);
+    }
+  }
+
+  void EmptyInfer() { thread_config_->is_paused_ = true; }
+};
+
+// Non-naggy version of Mock (won't warn when using default gmock
+// mocked function)
+using MockRequestRateWorker = testing::NiceMock<NaggyMockRequestRateWorker>;
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/mock_sequence_manager.h
+++ b/src/c++/perf_analyzer/mock_sequence_manager.h
+// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include "gmock/gmock.h"
+#include "sequence_manager.h"
+
+namespace triton { namespace perfanalyzer {
+
+class NaggyMockSequenceManager : public SequenceManager {
+ public:
+  NaggyMockSequenceManager() { SetupMocks(); }
+
+  NaggyMockSequenceManager(
+      const uint64_t start_sequence_id, const uint64_t sequence_id_range,
+      const size_t sequence_length, const bool sequence_length_specified,
+      const double sequence_length_variation, const bool using_json_data,
+      std::shared_ptr<DataLoader> data_loader)
+      : SequenceManager(
+            start_sequence_id, sequence_id_range, sequence_length,
+            sequence_length_specified, sequence_length_variation,
+            using_json_data, data_loader)
+  {
+    SetupMocks();
+  }
+
+  void SetupMocks()
+  {
+    ON_CALL(*this, SetInferSequenceOptions(testing::_, testing::_))
+        .WillByDefault([this](
+                           const uint32_t seq_stat_index,
+                           std::unique_ptr<cb::InferOptions>& options) {
+          this->SequenceManager::SetInferSequenceOptions(
+              seq_stat_index, options);
+        });
+    ON_CALL(*this, InitNewSequence(testing::_))
+        .WillByDefault([this](int seq_stat_index) {
+          this->SequenceManager::InitNewSequence(seq_stat_index);
+        });
+    ON_CALL(*this, GetNextSeqId(testing::_))
+        .WillByDefault([this](int seq_stat_index) -> uint64_t {
+          return this->SequenceManager::GetNextSeqId(seq_stat_index);
+        });
+    ON_CALL(*this, GetRandomSequenceLength(testing::_))
+        .WillByDefault([this](double offset_ratio) -> size_t {
+          return this->SequenceManager::GetRandomSequenceLength(offset_ratio);
+        });
+    ON_CALL(*this, GetNewDataStreamId()).WillByDefault([this]() -> size_t {
+      return this->SequenceManager::GetNewDataStreamId();
+    });
+  }
+
+  MOCK_METHOD(
+      void, SetInferSequenceOptions,
+      (const uint32_t, std::unique_ptr<cb::InferOptions>&), (override));
+  MOCK_METHOD(void, InitNewSequence, (int), (override));
+  MOCK_METHOD(uint64_t, GetNextSeqId, (int), (override));
+  MOCK_METHOD(size_t, GetRandomSequenceLength, (double), (override));
+  MOCK_METHOD(uint64_t, GetNewDataStreamId, (), (override));
+
+  std::vector<std::shared_ptr<SequenceStatus>>& sequence_statuses_{
+      SequenceManager::sequence_statuses_};
+  std::atomic<uint64_t>& curr_seq_id_{SequenceManager::curr_seq_id_};
+};
+
+using MockSequenceManager = testing::NiceMock<NaggyMockSequenceManager>;
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/model_parser.cc
+++ b/src/c++/perf_analyzer/model_parser.cc
+// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "model_parser.h"
+
+#include "rapidjson/writer.h"
+
+namespace triton { namespace perfanalyzer {
+
+cb::Error
+ModelParser::InitTriton(
+    const rapidjson::Document& metadata, const rapidjson::Document& config,
+    const std::string& model_version,
+    const std::vector<cb::ModelIdentifier>& bls_composing_models,
+    const std::unordered_map<std::string, std::vector<int64_t>>& input_shapes,
+    std::unique_ptr<cb::ClientBackend>& backend)
+{
+  model_name_ = metadata["name"].GetString();
+  model_version_ = model_version;
+
+  RETURN_IF_ERROR(
+      DetermineComposingModelMap(bls_composing_models, config, backend));
+
+  RETURN_IF_ERROR(DetermineSchedulerType(config, backend));
+
+  max_batch_size_ = 0;
+  const auto bs_itr = config.FindMember("max_batch_size");
+  if (bs_itr != config.MemberEnd()) {
+    int64_t mbs;
+    RETURN_IF_ERROR(GetInt(bs_itr->value, &mbs));
+    max_batch_size_ = mbs;
+  }
+
+  const auto txn_itr = config.FindMember("model_transaction_policy");
+  if (txn_itr != config.MemberEnd()) {
+    is_decoupled_ = txn_itr->value["decoupled"].GetBool();
+  }
+
+  // Get the information about inputs from metadata
+  const auto inputs_itr = metadata.FindMember("inputs");
+  if (inputs_itr != metadata.MemberEnd()) {
+    for (const auto& input : inputs_itr->value.GetArray()) {
+      auto it =
+          inputs_->emplace(input["name"].GetString(), ModelTensor()).first;
+      it->second.name_ = input["name"].GetString();
+      it->second.datatype_ = input["datatype"].GetString();
+      bool is_dynamic = false;
+      bool skip = (max_batch_size_ > 0);
+      for (const auto& dim : input["shape"].GetArray()) {
+        if (skip) {
+          skip = false;
+          continue;
+        }
+        int64_t dim_int;
+        RETURN_IF_ERROR(GetInt(dim, &dim_int));
+        if (dim_int == -1) {
+          is_dynamic = true;
+        }
+        it->second.shape_.push_back(dim_int);
+      }
+
+      if (is_dynamic) {
+        const auto user_shape_it = input_shapes.find(it->second.name_);
+        if (user_shape_it != input_shapes.end()) {
+          // Update the default shape to be used.
+          it->second.shape_.clear();
+          for (const auto dim : user_shape_it->second) {
+            it->second.shape_.push_back(dim);
+          }
+        }
+      }
+    }
+  }
+
+  // Check whether the tensor is shape tensor or not from config.
+  const auto inputs_config_itr = config.FindMember("input");
+  if (inputs_config_itr != config.MemberEnd()) {
+    for (const auto& input_config : inputs_config_itr->value.GetArray()) {
+      const auto name = std::string(
+          input_config["name"].GetString(),
+          input_config["name"].GetStringLength());
+      auto it = inputs_->find(name);
+      if (it == inputs_->end()) {
+        return cb::Error(
+            "no metadata found for input tensor " + name, pa::GENERIC_ERROR);
+      }
+      const auto& shape_tensor_itr = input_config.FindMember("is_shape_tensor");
+      if (shape_tensor_itr != input_config.MemberEnd()) {
+        it->second.is_shape_tensor_ = shape_tensor_itr->value.GetBool();
+      }
+
+      if (input_config.HasMember("optional")) {
+        it->second.is_optional_ = input_config["optional"].GetBool();
+      } else {
+        it->second.is_optional_ = false;
+      }
+    }
+  }
+
+  // Get the information about outputs from metadata
+  const auto outputs_itr = metadata.FindMember("outputs");
+  if (outputs_itr != metadata.MemberEnd()) {
+    for (const auto& output : outputs_itr->value.GetArray()) {
+      auto it =
+          outputs_->emplace(output["name"].GetString(), ModelTensor()).first;
+      it->second.name_ = output["name"].GetString();
+      it->second.datatype_ = output["datatype"].GetString();
+      bool skip = (max_batch_size_ > 0);
+      for (const auto& dim : output["shape"].GetArray()) {
+        if (skip) {
+          skip = false;
+          continue;
+        }
+        int64_t dim_int;
+        RETURN_IF_ERROR(GetInt(dim, &dim_int));
+        it->second.shape_.push_back(dim_int);
+      }
+    }
+  }
+
+  // Check whether the tensor is shape tensor or not from config.
+  const auto output_config_itr = config.FindMember("output");
+  if (output_config_itr != config.MemberEnd()) {
+    for (const auto& output_config : output_config_itr->value.GetArray()) {
+      const auto name = std::string(
+          output_config["name"].GetString(),
+          output_config["name"].GetStringLength());
+      auto itr = outputs_->find(name);
+      if (itr == outputs_->end()) {
+        return cb::Error(
+            "no metadata found for output tensor " + name, pa::GENERIC_ERROR);
+      }
+      const auto& shape_tensor_itr =
+          output_config.FindMember("is_shape_tensor");
+      if (shape_tensor_itr != output_config.MemberEnd()) {
+        itr->second.is_shape_tensor_ = shape_tensor_itr->value.GetBool();
+      }
+    }
+  }
+
+  // Check if model has response caching enabled
+  const auto cache_itr = config.FindMember("response_cache");
+  // response_cache_enabled_ set globally for reporting purposes if any
+  // composing model has it enabled, so don't overwrite it if already set
+  if (cache_itr != config.MemberEnd() && !response_cache_enabled_) {
+    response_cache_enabled_ = cache_itr->value["enable"].GetBool();
+  }
+
+  return cb::Error::Success;
+}
+
+cb::Error
+ModelParser::InitTFServe(
+    const rapidjson::Document& metadata, const std::string& model_name,
+    const std::string& model_version, const std::string& model_signature_name,
+    const int32_t batch_size,
+    const std::unordered_map<std::string, std::vector<int64_t>>& input_shapes,
+    std::unique_ptr<cb::ClientBackend>& backend)
+{
+  model_name_ = model_name;
+  model_version_ = model_version;
+  model_signature_name_ = model_signature_name;
+  // Get the scheduler type for the model
+  scheduler_type_ = NONE;
+
+  // Will use the user provided batch size as max. Relies on the service
+  // to throw an error if not supported.
+  max_batch_size_ = batch_size;
+
+  const rapidjson::Value& signature_config =
+      metadata["metadata"]["signature_def"]["signature_def"];
+  if (!signature_config.HasMember(model_signature_name.c_str())) {
+    return cb::Error(
+        "Failed to find signature_name \"" + model_signature_name +
+            "\" in the metadata",
+        pa::GENERIC_ERROR);
+  }
+
+  // Get the information about inputs from metadata
+  if (signature_config[model_signature_name.c_str()].HasMember("inputs")) {
+    const rapidjson::Value& inputs =
+        signature_config[model_signature_name.c_str()]["inputs"];
+    for (rapidjson::Value::ConstMemberIterator json_itr = inputs.MemberBegin();
+         json_itr != inputs.MemberEnd(); ++json_itr) {
+      auto it =
+          inputs_->emplace(json_itr->name.GetString(), ModelTensor()).first;
+      it->second.name_ = json_itr->name.GetString();
+      RETURN_IF_ERROR(ConvertDTypeFromTFS(
+          json_itr->value["dtype"].GetString(), &it->second.datatype_));
+
+      bool is_dynamic = false;
+      if (json_itr->value["tensor_shape"]["unknown_rank"].GetBool()) {
+        if (max_batch_size_ != 0) {
+          return cb::Error(
+              "Can not specify -b flag for saved model with unknown ranked "
+              "inputs",
+              pa::GENERIC_ERROR);
+        }
+        is_dynamic = true;
+      } else {
+        bool first_dim = true;
+        for (const auto& dim :
+             json_itr->value["tensor_shape"]["dim"].GetArray()) {
+          int64_t dim_int;
+          RETURN_IF_ERROR(GetInt(dim["size"], &dim_int));
+          if (first_dim && (max_batch_size_ != 0)) {
+            if (dim_int != -1) {
+              return cb::Error(
+                  "Can not specify -b flag for saved model with input not "
+                  "having their first dim as -1",
+                  pa::GENERIC_ERROR);
+            }
+            first_dim = false;
+          } else {
+            if (dim_int == -1) {
+              is_dynamic = true;
+            }
+            it->second.shape_.push_back(dim_int);
+          }
+        }
+      }
+
+      if (is_dynamic) {
+        const auto user_shape_it = input_shapes.find(it->second.name_);
+        if (user_shape_it != input_shapes.end()) {
+          // Update the default shape to be used.
+          it->second.shape_.clear();
+          for (const auto dim : user_shape_it->second) {
+            it->second.shape_.push_back(dim);
+          }
+        }
+      }
+    }
+  }
+
+  // Will not extract the information about the information about the outputs.
+  // As by default, the TensorFlow serving will return all the output tensors
+  // if none are requested.
+  // See here
+  // https://github.com/tensorflow/serving/blob/2.3.0/tensorflow_serving/apis/predict.proto#L27
+
+  return cb::Error::Success;
+}
+
+cb::Error
+ModelParser::InitTorchServe(
+    const std::string& model_name, const std::string& model_version,
+    const int32_t batch_size)
+{
+  // TorchServe does not return model metadata hence we can not obtain any
+  // parameters.
+  model_name_ = model_name;
+  model_version_ = model_version;
+  max_batch_size_ = batch_size;
+
+  // TorchServe needs to upload a file to the server. The input will hold the
+  // path to the file which should be provided as json to --input-data
+  auto it = inputs_->emplace("TORCHSERVE_INPUT", ModelTensor()).first;
+  it->second.name_ = "TORCHSERVE_INPUT";
+  it->second.datatype_ = "BYTES";
+  // Supports only a single input file
+  it->second.shape_.push_back(1);
+
+  return cb::Error::Success;
+}
+
+cb::Error
+ModelParser::DetermineComposingModelMap(
+    const std::vector<cb::ModelIdentifier>& bls_composing_models,
+    const rapidjson::Document& config,
+    std::unique_ptr<cb::ClientBackend>& backend)
+{
+  RETURN_IF_ERROR(AddBLSComposingModels(bls_composing_models, config, backend));
+  RETURN_IF_ERROR(AddEnsembleComposingModels(config, backend));
+
+  return cb::Error::Success;
+}
+
+cb::Error
+ModelParser::AddBLSComposingModels(
+    const std::vector<cb::ModelIdentifier>& bls_composing_models,
+    const rapidjson::Document& config,
+    std::unique_ptr<cb::ClientBackend>& backend)
+{
+  for (auto model : bls_composing_models) {
+    (*composing_models_map_)[config["name"].GetString()].insert(model);
+
+    rapidjson::Document composing_model_config;
+    RETURN_IF_ERROR(backend->ModelConfig(
+        &composing_model_config, model.first, model.second));
+    RETURN_IF_ERROR(
+        AddEnsembleComposingModels(composing_model_config, backend));
+  }
+
+  return cb::Error::Success;
+}
+
+cb::Error
+ModelParser::AddEnsembleComposingModels(
+    const rapidjson::Document& config,
+    std::unique_ptr<cb::ClientBackend>& backend)
+{
+  if (config.HasMember("platform") &&
+      std::string(config["platform"].GetString()).compare("ensemble") == 0) {
+    const auto step_itr = config["ensemble_scheduling"].FindMember("step");
+    for (const auto& step : step_itr->value.GetArray()) {
+      std::string step_model_version;
+      int64_t model_version_int;
+      RETURN_IF_ERROR(GetInt(step["model_version"], &model_version_int));
+      if (model_version_int == -1) {
+        step_model_version = "";
+      } else {
+        step_model_version = std::to_string(model_version_int);
+      }
+
+      (*composing_models_map_)[config["name"].GetString()].emplace(
+          std::string(step["model_name"].GetString()), step_model_version);
+
+      rapidjson::Document composing_model_config;
+      RETURN_IF_ERROR(backend->ModelConfig(
+          &composing_model_config, step["model_name"].GetString(),
+          step_model_version));
+      RETURN_IF_ERROR(
+          AddEnsembleComposingModels(composing_model_config, backend));
+    }
+  }
+
+  return cb::Error::Success;
+}
+
+
+cb::Error
+ModelParser::DetermineSchedulerType(
+    const rapidjson::Document& config,
+    std::unique_ptr<cb::ClientBackend>& backend)
+{
+  scheduler_type_ = NONE;
+
+  if (composing_models_map_->size() != 0) {
+    bool is_sequential = false;
+    RETURN_IF_ERROR(GetComposingSchedulerType(backend, &is_sequential));
+    if (is_sequential) {
+      scheduler_type_ = ENSEMBLE_SEQUENCE;
+    } else {
+      scheduler_type_ = ENSEMBLE;
+    }
+  } else {
+    const auto& sequence_itr = config.FindMember("sequence_batching");
+    if (sequence_itr != config.MemberEnd()) {
+      scheduler_type_ = SEQUENCE;
+    } else {
+      const auto& dynamic_itr = config.FindMember("dynamic_batching");
+      if (dynamic_itr != config.MemberEnd()) {
+        scheduler_type_ = DYNAMIC;
+      }
+    }
+  }
+  return cb::Error::Success;
+}
+
+cb::Error
+ModelParser::GetComposingSchedulerType(
+    std::unique_ptr<cb::ClientBackend>& backend, bool* is_sequential)
+{
+  for (auto parent_composing_models : *composing_models_map_.get()) {
+    auto& composing_models = parent_composing_models.second;
+    for (auto composing_model : composing_models) {
+      rapidjson::Document config;
+      RETURN_IF_ERROR(backend->ModelConfig(
+          &config, composing_model.first, composing_model.second));
+
+      const auto& sequence_itr = config.FindMember("sequence_batching");
+      if (sequence_itr != config.MemberEnd()) {
+        *is_sequential = true;
+      }
+
+      const auto cache_itr = config.FindMember("response_cache");
+      // response_cache_enabled_ set globally for reporting purposes if any
+      // composing model has it enabled, so don't overwrite it if already set
+      if (cache_itr != config.MemberEnd() && !response_cache_enabled_) {
+        response_cache_enabled_ = cache_itr->value["enable"].GetBool();
+      }
+    }
+  }
+  return cb::Error::Success;
+}
+
+cb::Error
+ModelParser::GetInt(const rapidjson::Value& value, int64_t* integer_value)
+{
+  if (value.IsString()) {
+    std::string str(value.GetString(), value.GetStringLength());
+
+    try {
+      *integer_value = std::stoll(str.c_str());
+    }
+    catch (...) {
+      return cb::Error(
+          std::string("unable to convert '") + str + "' to integer",
+          pa::GENERIC_ERROR);
+    }
+
+  } else if (value.IsInt64()) {
+    *integer_value = value.GetInt64();
+  } else if (value.IsInt()) {
+    *integer_value = value.GetInt();
+  } else {
+    return cb::Error("failed to parse the integer value", pa::GENERIC_ERROR);
+  }
+
+  return cb::Error::Success;
+}
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/model_parser.h
+++ b/src/c++/perf_analyzer/model_parser.h
+// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <unordered_map>
+
+#include "client_backend/client_backend.h"
+#include "perf_utils.h"
+
+namespace triton { namespace perfanalyzer {
+
+#ifndef DOCTEST_CONFIG_DISABLE
+class TestModelParser;
+class MockModelParser;
+#endif
+
+struct ModelTensor {
+  ModelTensor() : is_shape_tensor_(false) {}
+  std::string name_;
+  std::string datatype_;
+  std::vector<int64_t> shape_;
+  // Indicates if this tensor holds shape information for other tensors
+  bool is_shape_tensor_;
+  bool is_optional_;
+};
+
+using ModelTensorMap = std::map<std::string, ModelTensor>;
+using ComposingModelMap = std::map<std::string, std::set<cb::ModelIdentifier>>;
+
+//==============================================================================
+/// ModelParser is a helper class to parse the information about the target
+/// model from the metadata and configuration returned by the server.
+///
+/// Perf Analyzer depends upon the various properties of the model to correctly
+/// generate and issue inference request for the model. The object of this
+/// class will provide these necessary details.
+class ModelParser {
+ public:
+  enum ModelSchedulerType {
+    NONE,
+    DYNAMIC,
+    SEQUENCE,
+    ENSEMBLE,
+    ENSEMBLE_SEQUENCE
+  };
+
+  explicit ModelParser(cb::BackendKind backend_kind)
+      : backend_kind_(backend_kind),
+        inputs_(std::make_shared<ModelTensorMap>()),
+        outputs_(std::make_shared<ModelTensorMap>()),
+        composing_models_map_(std::make_shared<ComposingModelMap>()),
+        scheduler_type_(NONE), max_batch_size_(0), is_decoupled_(false),
+        response_cache_enabled_(false)
+  {
+  }
+
+  /// Initializes the ModelParser with the metadata and config rapidjson DOM
+  /// for the target model obtained from Triton service
+  /// \param metadata The metadata of the target model.
+  /// \param config The config of the target model.
+  /// \param model_version The version of target model.
+  /// \param bls_composing_models A list of BLS composing model identifiers
+  /// \param input_shapes The user provided default shapes which will be use
+  /// if a certain input has wildcard in its dimension.
+  /// \param backend The backend object.
+  /// \return cb::Error object indicating success or failure.
+  cb::Error InitTriton(
+      const rapidjson::Document& metadata, const rapidjson::Document& config,
+      const std::string& model_version,
+      const std::vector<cb::ModelIdentifier>& bls_composing_models,
+      const std::unordered_map<std::string, std::vector<int64_t>>& input_shapes,
+      std::unique_ptr<cb::ClientBackend>& backend);
+
+  /// Initializes the ModelParser with the metadata and config rapidjson DOM
+  /// for the target model obtained from TF serving service.
+  /// \param metadata The metadata of the target model.
+  /// \param model_name The name of target model.
+  /// \param model_version The version of target model.
+  /// \param model_signature_name The signature name of target model.
+  /// \param input_shapes The user provided default shapes which will be use
+  /// if a certain input has wildcard in its dimension.
+  /// \param backend The backend object.
+  /// \return cb::Error object indicating success or failure.
+  cb::Error InitTFServe(
+      const rapidjson::Document& metadata, const std::string& model_name,
+      const std::string& model_version, const std::string& model_signature_name,
+      const int32_t batch_size,
+      const std::unordered_map<std::string, std::vector<int64_t>>& input_shapes,
+      std::unique_ptr<cb::ClientBackend>& backend);
+
+  cb::Error InitTorchServe(
+      const std::string& model_name, const std::string& model_version,
+      const int32_t batch_size);
+
+  /// Get the name of the target model
+  /// \return Model name as string
+  const std::string& ModelName() const { return model_name_; }
+
+  /// Get the version of target model
+  /// \return Model version as string
+  const std::string& ModelVersion() const { return model_version_; }
+
+  /// Get the signature name of target model
+  /// \return Model signature name as string
+  const std::string& ModelSignatureName() const
+  {
+    return model_signature_name_;
+  }
+
+  /// Get the scheduler type for the model
+  ModelSchedulerType SchedulerType() const { return scheduler_type_; }
+
+  /// Get the max batch size supported by the model. Returns 0 if the model
+  /// does not support batching.
+  /// \return The maximum supported batch size.
+  size_t MaxBatchSize() const { return max_batch_size_; }
+
+  /// Returns whether or not the model is decoupled
+  /// \return the truth value of whether the model is decoupled
+  bool IsDecoupled() const { return is_decoupled_; }
+
+  /// Returns whether or not response cache is enabled for this model
+  /// \return the truth value of whether response cache is enabled for this
+  /// model
+  bool ResponseCacheEnabled() const { return response_cache_enabled_; }
+
+  /// Get the details about the model inputs.
+  /// \return The map with tensor_name and the tensor details
+  /// stored as key-value pair.
+  const std::shared_ptr<ModelTensorMap>& Inputs() { return inputs_; }
+
+  /// Get the details about the model outputs.
+  /// \return The map with tensor_name and the tensor details
+  /// stored as key-value pair.
+  const std::shared_ptr<ModelTensorMap>& Outputs() { return outputs_; }
+
+  /// Get the composing maps for the target model.
+  /// \return The pointer to the nested map describing the
+  /// nested flow in the target model.
+  const std::shared_ptr<ComposingModelMap>& GetComposingModelMap()
+  {
+    return composing_models_map_;
+  }
+
+ protected:
+  ModelSchedulerType scheduler_type_;
+  bool is_decoupled_;
+
+ private:
+  /// Populate composing_models_map_ based on any bls composing models passed in
+  /// via the CLI as well as any ensemble or nested ensemble models
+  cb::Error DetermineComposingModelMap(
+      const std::vector<cb::ModelIdentifier>& bls_composing_models,
+      const rapidjson::Document& config,
+      std::unique_ptr<cb::ClientBackend>& backend);
+
+  cb::Error AddBLSComposingModels(
+      const std::vector<cb::ModelIdentifier>& bls_composing_models,
+      const rapidjson::Document& config,
+      std::unique_ptr<cb::ClientBackend>& backend);
+
+  cb::Error AddEnsembleComposingModels(
+      const rapidjson::Document& config,
+      std::unique_ptr<cb::ClientBackend>& backend);
+
+  /// Populate scheduler_type_ based on the scheduler type of the parent model
+  /// as well as any composing models
+  cb::Error DetermineSchedulerType(
+      const rapidjson::Document& config,
+      std::unique_ptr<cb::ClientBackend>& backend);
+
+  /// Sets is_sequential to true if any of the composing models are sequential
+  cb::Error GetComposingSchedulerType(
+      std::unique_ptr<cb::ClientBackend>& backend, bool* is_sequential);
+
+  /// In the json produced by protobuf, int64 and uint64 values are
+  /// represented as strings. Protobuf doesn't provide an option to
+  /// disable this (sigh) so we need to correctly parse these fields
+  /// for ModelParser to receive appropriate requests.
+  /// \param value The rapidjson value object with the int value.
+  /// \param integer_value The output integer pointer.
+  /// \return cb::Error object indicating success or failure.
+  cb::Error GetInt(const rapidjson::Value& value, int64_t* integer_value);
+
+  cb::BackendKind backend_kind_;
+
+  std::shared_ptr<ModelTensorMap> inputs_;
+  std::shared_ptr<ModelTensorMap> outputs_;
+  std::shared_ptr<ComposingModelMap> composing_models_map_;
+
+  std::string model_name_;
+  std::string model_version_;
+  std::string model_signature_name_;
+  size_t max_batch_size_;
+  bool response_cache_enabled_;
+
+#ifndef DOCTEST_CONFIG_DISABLE
+  friend TestModelParser;
+  friend MockModelParser;
+
+ public:
+  ModelParser() = default;
+#endif
+};
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/mpi_utils.cc
+++ b/src/c++/perf_analyzer/mpi_utils.cc
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "mpi_utils.h"
+
+#include <dlfcn.h>
+
+#include <iostream>
+#include <stdexcept>
+
+namespace triton { namespace perfanalyzer {
+
+MPIDriver::MPIDriver(bool is_enabled) : is_enabled_(is_enabled)
+{
+  if (is_enabled_ == false) {
+    return;
+  }
+
+  handle_ = dlopen("libmpi.so", RTLD_LAZY | RTLD_GLOBAL);
+
+  if (handle_ == nullptr) {
+    throw std::runtime_error(
+        "Unable to load MPI library. If you are trying to run with "
+        "MPI / multiple models, check that 'libmpi.so' is on "
+        "`LD_LIBRARY_PATH` environment variable path.");
+  }
+
+  CheckMPIImpl();
+}
+
+bool
+MPIDriver::IsMPIRun()
+{
+  if (is_enabled_ == false) {
+    return false;
+  }
+
+  if (MPIInitialized() == false) {
+    throw std::runtime_error("Must call MPI_Init() before calling IsMPIRun().");
+  }
+
+  return MPICommSizeWorld() > 1;
+}
+
+void
+MPIDriver::MPIInit(int* argc, char*** argv)
+{
+  if (is_enabled_ == false) {
+    return;
+  }
+
+  int (*MPI_Init)(
+      int*, char***){(int (*)(int*, char***))dlsym(handle_, "MPI_Init")};
+  if (MPI_Init == nullptr) {
+    throw std::runtime_error("Unable to obtain address of `MPI_Init` symbol.");
+  }
+
+  MPI_Init(argc, argv);
+}
+
+int
+MPIDriver::MPICommSizeWorld()
+{
+  if (is_enabled_ == false) {
+    return -1;
+  }
+
+  int world_size{1};
+
+  int (*MPI_Comm_size)(
+      void*, int*){(int (*)(void*, int*))dlsym(handle_, "MPI_Comm_size")};
+  if (MPI_Comm_size == nullptr) {
+    throw std::runtime_error(
+        "Unable to obtain address of `MPI_Comm_size` symbol.");
+  }
+
+  MPI_Comm_size(MPICommWorld(), &world_size);
+
+  return world_size;
+}
+
+void
+MPIDriver::MPIBarrierWorld()
+{
+  if (is_enabled_ == false) {
+    return;
+  }
+
+  int (*MPI_Barrier)(void*){(int (*)(void*))dlsym(handle_, "MPI_Barrier")};
+  if (MPI_Barrier == nullptr) {
+    throw std::runtime_error(
+        "Unable to obtain address of `MPI_Barrier` symbol.");
+  }
+
+  MPI_Barrier(MPICommWorld());
+}
+
+int
+MPIDriver::MPICommRankWorld()
+{
+  if (is_enabled_ == false) {
+    return -1;
+  }
+
+  int rank{0};
+
+  int (*MPI_Comm_rank)(
+      void*, int*){(int (*)(void*, int*))dlsym(handle_, "MPI_Comm_rank")};
+  if (MPI_Comm_rank == nullptr) {
+    throw std::runtime_error(
+        "Unable to obtain address of `MPI_Comm_rank` symbol.");
+  }
+
+  MPI_Comm_rank(MPICommWorld(), &rank);
+
+  return rank;
+}
+
+void
+MPIDriver::MPIBcastIntWorld(void* buffer, int count, int root)
+{
+  if (is_enabled_ == false) {
+    return;
+  }
+
+  int (*MPI_Bcast)(void*, int, void*, int, void*){
+      (int (*)(void*, int, void*, int, void*))dlsym(handle_, "MPI_Bcast")};
+  if (MPI_Bcast == nullptr) {
+    throw std::runtime_error("Unable to obtain address of `MPI_Bcast` symbol.");
+  }
+
+  MPI_Bcast(buffer, count, MPIInt(), root, MPICommWorld());
+}
+
+void
+MPIDriver::MPIFinalize()
+{
+  if (is_enabled_ == false) {
+    return;
+  }
+
+  int (*MPI_Finalize)(){(int (*)())dlsym(handle_, "MPI_Finalize")};
+  if (MPI_Finalize == nullptr) {
+    throw std::runtime_error(
+        "Unable to obtain address of `MPI_Finalize` symbol.");
+  }
+
+  MPI_Finalize();
+}
+
+bool
+MPIDriver::MPIInitialized()
+{
+  if (is_enabled_ == false) {
+    return false;
+  }
+
+  int (*MPI_Initialized)(int*){
+      (int (*)(int*))dlsym(handle_, "MPI_Initialized")};
+  if (MPI_Initialized == nullptr) {
+    throw std::runtime_error(
+        "Unable to obtain address of `MPI_Initialized` symbol.");
+  }
+
+  int initialized{0};
+  MPI_Initialized(&initialized);
+  return initialized != 0;
+}
+
+void*
+MPIDriver::MPICommWorld()
+{
+  if (is_enabled_ == false) {
+    return nullptr;
+  }
+
+  void* MPI_COMM_WORLD{dlsym(handle_, "ompi_mpi_comm_world")};
+  if (MPI_COMM_WORLD == nullptr) {
+    throw std::runtime_error(
+        "Unable to obtain address of `ompi_mpi_comm_world` symbol.");
+  }
+
+  return MPI_COMM_WORLD;
+}
+
+void*
+MPIDriver::MPIInt()
+{
+  if (is_enabled_ == false) {
+    return nullptr;
+  }
+
+  void* MPI_INT{dlsym(handle_, "ompi_mpi_int")};
+  if (MPI_INT == nullptr) {
+    throw std::runtime_error(
+        "Unable to obtain address of `ompi_mpi_int` symbol.");
+  }
+
+  return MPI_INT;
+}
+
+void
+MPIDriver::CheckMPIImpl()
+{
+  if (is_enabled_ == false) {
+    return;
+  }
+
+  int (*MPI_Get_library_version)(char*, int*){
+      (int (*)(char*, int*))dlsym(handle_, "MPI_Get_library_version")};
+  if (MPI_Get_library_version == nullptr) {
+    throw std::runtime_error(
+        "Unable to obtain address of `MPI_Get_library_version` symbol.");
+  }
+
+  std::string version;
+  version.resize(MPIVersionStringMaximumLength);
+  int resultlen{0};
+  MPI_Get_library_version(&version[0], &resultlen);
+
+  if (version.find("Open MPI") != 0) {
+    throw std::runtime_error(
+        "Perf Analyzer only supports Open MPI. Please uninstall your current "
+        "implementation of MPI and install Open MPI.");
+  }
+}
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/mpi_utils.h
+++ b/src/c++/perf_analyzer/mpi_utils.h
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <memory>
+
+namespace triton { namespace perfanalyzer {
+
+class MPIDriver {
+ public:
+  // Initializes class. Saves handle to MPI library if MPI library is available.
+  MPIDriver(bool is_enabled = false);
+
+  // Returns true if the current process is an MPI process with world size
+  // greater than 1.
+  bool IsMPIRun();
+
+  // Attempts to call MPI_Init API.
+  void MPIInit(int* argc, char*** argv);
+
+  // Attempts to call MPI_Comm_size API with MPI_COMM_WORLD communicator.
+  int MPICommSizeWorld();
+
+  // Attempts to call MPI_Barrier API with MPI_COMM_WORLD communicator.
+  void MPIBarrierWorld();
+
+  // Attempts to call MPI_Comm_rank API with MPI_COMM_WORLD communicator.
+  int MPICommRankWorld();
+
+  // Attempts to call MPI_Bcast API with MPI_INT data type and MPI_COMM_WORLD
+  // communicator.
+  void MPIBcastIntWorld(void* buffer, int count, int root);
+
+  // Attempts to call MPI_Finalize API.
+  void MPIFinalize();
+
+ private:
+  // Attempts to call MPI_Initialized API.
+  bool MPIInitialized();
+
+  // Returns MPI_COMM_WORLD symbol address if MPI library is available,
+  // otherwise `nullptr`.
+  void* MPICommWorld();
+
+  // Returns MPI_INT symbol address if MPI library is available, otherwise
+  // `nullptr`.
+  void* MPIInt();
+
+  // Attempts to check that Open MPI is installed.
+  void CheckMPIImpl();
+
+  // Bool for whether user has opted to attempt to use MPI functionality.
+  bool is_enabled_{false};
+
+  // Loaded object for MPI library.
+  void* handle_{nullptr};
+
+  // Maximum string length for MPI version string.
+  const uint64_t MPIVersionStringMaximumLength{32768};
+};
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/perf_analyzer.cc
+++ b/src/c++/perf_analyzer/perf_analyzer.cc
+// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "perf_analyzer.h"
+
+#include "perf_analyzer_exception.h"
+#include "report_writer.h"
+#include "request_rate_manager.h"
+
+namespace pa = triton::perfanalyzer;
+
+namespace triton { namespace perfanalyzer {
+
+volatile bool early_exit = false;
+
+void
+SignalHandler(int signum)
+{
+  std::cout << "Interrupt signal (" << signum << ") received." << std::endl;
+  // Upon invoking the SignalHandler for the first time early_exit flag is
+  // invoked and analyzer waits for in-flight inferences to complete before
+  // exiting. On the second invocation, the program exits immediately.
+  if (!early_exit) {
+    std::cout << "Waiting for in-flight inferences to complete." << std::endl;
+    early_exit = true;
+  } else {
+    std::cout << "Exiting immediately..." << std::endl;
+    exit(0);
+  }
+}
+}}  // namespace triton::perfanalyzer
+
+PerfAnalyzer::PerfAnalyzer(pa::PAParamsPtr params) : params_(params)
+{
+  CreateAnalyzerObjects();
+}
+
+void
+PerfAnalyzer::Run()
+{
+  PrerunReport();
+  Profile();
+  WriteReport();
+  GenerateProfileExport();
+  Finalize();
+}
+
+void
+PerfAnalyzer::CreateAnalyzerObjects()
+{
+  // trap SIGINT to allow threads to exit gracefully
+  signal(SIGINT, pa::SignalHandler);
+  std::shared_ptr<cb::ClientBackendFactory> factory;
+  FAIL_IF_ERR(
+      cb::ClientBackendFactory::Create(
+          params_->kind, params_->url, params_->protocol, params_->ssl_options,
+          params_->trace_options, params_->compression_algorithm,
+          params_->http_headers, params_->triton_server_path,
+          params_->model_repository_path, params_->extra_verbose,
+          params_->metrics_url, params_->input_tensor_format,
+          params_->output_tensor_format, &factory),
+      "failed to create client factory");
+
+  FAIL_IF_ERR(
+      factory->CreateClientBackend(&backend_),
+      "failed to create triton client backend");
+
+  parser_ = std::make_shared<pa::ModelParser>(params_->kind);
+  if (params_->kind == cb::BackendKind::TRITON ||
+      params_->kind == cb::BackendKind::TRITON_C_API) {
+    rapidjson::Document model_metadata;
+    FAIL_IF_ERR(
+        backend_->ModelMetadata(
+            &model_metadata, params_->model_name, params_->model_version),
+        "failed to get model metadata");
+    rapidjson::Document model_config;
+    FAIL_IF_ERR(
+        backend_->ModelConfig(
+            &model_config, params_->model_name, params_->model_version),
+        "failed to get model config");
+
+    FAIL_IF_ERR(
+        parser_->InitTriton(
+            model_metadata, model_config, params_->model_version,
+            params_->bls_composing_models, params_->input_shapes, backend_),
+        "failed to create model parser");
+  } else if (params_->kind == cb::BackendKind::TENSORFLOW_SERVING) {
+    rapidjson::Document model_metadata;
+    FAIL_IF_ERR(
+        backend_->ModelMetadata(
+            &model_metadata, params_->model_name, params_->model_version),
+        "failed to get model metadata");
+    FAIL_IF_ERR(
+        parser_->InitTFServe(
+            model_metadata, params_->model_name, params_->model_version,
+            params_->model_signature_name, params_->batch_size,
+            params_->input_shapes, backend_),
+        "failed to create model parser");
+  } else if (params_->kind == cb::BackendKind::TORCHSERVE) {
+    FAIL_IF_ERR(
+        parser_->InitTorchServe(
+            params_->model_name, params_->model_version, params_->batch_size),
+        "failed to create model parser");
+  } else {
+    std::cerr << "unsupported client backend kind" << std::endl;
+    throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
+  }
+
+  if ((parser_->MaxBatchSize() == 0) && params_->batch_size > 1) {
+    std::cerr << "can not specify batch size > 1 as the model does not support "
+                 "batching"
+              << std::endl;
+    throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
+  }
+
+  // Change the default value for the --async option for sequential models
+  if ((parser_->SchedulerType() == pa::ModelParser::SEQUENCE) ||
+      (parser_->SchedulerType() == pa::ModelParser::ENSEMBLE_SEQUENCE)) {
+    if (!params_->async) {
+      params_->async = params_->forced_sync ? false : true;
+    }
+    // Validate the batch_size specification
+    if (params_->batch_size > 1) {
+      std::cerr << "can not specify batch size > 1 when using a sequence model"
+                << std::endl;
+      throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
+    }
+  }
+
+  if (params_->streaming) {
+    if (params_->forced_sync) {
+      std::cerr << "can not use streaming with synchronous API" << std::endl;
+      throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
+    }
+    params_->async = true;
+  }
+
+  std::unique_ptr<pa::LoadManager> manager;
+
+  if (params_->targeting_concurrency()) {
+    if ((parser_->SchedulerType() == pa::ModelParser::SEQUENCE) ||
+        (parser_->SchedulerType() == pa::ModelParser::ENSEMBLE_SEQUENCE)) {
+      if (params_->concurrency_range.end == pa::NO_LIMIT && params_->async) {
+        std::cerr << "The 'end' concurrency can not be 0 for sequence "
+                     "models when using asynchronous API."
+                  << std::endl;
+        throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
+      }
+    }
+    params_->max_concurrency = std::max(
+        params_->concurrency_range.start, params_->concurrency_range.end);
+
+    if (!params_->async) {
+      if (params_->concurrency_range.end == pa::NO_LIMIT) {
+        std::cerr
+            << "WARNING: The maximum attainable concurrency will be limited by "
+               "max_threads specification."
+            << std::endl;
+        params_->concurrency_range.end = params_->max_threads;
+      } else {
+        // As only one synchronous request can be generated from a thread at a
+        // time, to maintain the requested concurrency, that many threads need
+        // to be generated.
+        if (params_->max_threads_specified) {
+          std::cerr
+              << "WARNING: Overriding max_threads specification to ensure "
+                 "requested concurrency range."
+              << std::endl;
+        }
+        params_->max_threads = std::max(
+            params_->concurrency_range.start, params_->concurrency_range.end);
+      }
+    }
+    if ((params_->sequence_id_range != 0) &&
+        (params_->sequence_id_range < params_->max_concurrency)) {
+      std::cerr << "sequence id range specified is smaller than the "
+                << "maximum possible concurrency, sequence id collision may "
+                << "occur." << std::endl;
+      throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
+    }
+    FAIL_IF_ERR(
+        pa::ConcurrencyManager::Create(
+            params_->async, params_->streaming, params_->batch_size,
+            params_->max_threads, params_->max_concurrency,
+            params_->shared_memory_type, params_->output_shm_size, parser_,
+            factory, &manager),
+        "failed to create concurrency manager");
+
+  } else if (params_->using_request_rate_range) {
+    if ((params_->sequence_id_range != 0) &&
+        (params_->sequence_id_range < params_->num_of_sequences)) {
+      std::cerr
+          << "sequence id range specified is smaller than the "
+          << "maximum possible number of sequences, sequence id collision "
+          << "may occur." << std::endl;
+      throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
+    }
+    FAIL_IF_ERR(
+        pa::RequestRateManager::Create(
+            params_->async, params_->streaming, params_->measurement_window_ms,
+            params_->max_trials, params_->request_distribution,
+            params_->batch_size, params_->max_threads,
+            params_->num_of_sequences, params_->shared_memory_type,
+            params_->output_shm_size, params_->serial_sequences, parser_,
+            factory, &manager),
+        "failed to create request rate manager");
+
+  } else {
+    if ((params_->sequence_id_range != 0) &&
+        (params_->sequence_id_range < params_->num_of_sequences)) {
+      std::cerr
+          << "sequence id range specified is smaller than the "
+          << "maximum possible number of sequences, sequence id collision "
+          << "may occur." << std::endl;
+      throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
+    }
+    FAIL_IF_ERR(
+        pa::CustomLoadManager::Create(
+            params_->async, params_->streaming, params_->measurement_window_ms,
+            params_->max_trials, params_->request_intervals_file,
+            params_->batch_size, params_->max_threads,
+            params_->num_of_sequences, params_->shared_memory_type,
+            params_->output_shm_size, params_->serial_sequences, parser_,
+            factory, &manager),
+        "failed to create custom load manager");
+  }
+
+  manager->InitManager(
+      params_->string_length, params_->string_data, params_->zero_input,
+      params_->user_data, params_->start_sequence_id,
+      params_->sequence_id_range, params_->sequence_length,
+      params_->sequence_length_specified, params_->sequence_length_variation);
+
+  FAIL_IF_ERR(
+      pa::ProfileDataCollector::Create(&collector_),
+      "failed to create profile data collector");
+
+  FAIL_IF_ERR(
+      pa::ProfileDataExporter::Create(&exporter_),
+      "failed to create profile data exporter");
+
+  FAIL_IF_ERR(
+      pa::InferenceProfiler::Create(
+          params_->verbose, params_->stability_threshold,
+          params_->measurement_window_ms, params_->max_trials,
+          params_->percentile, params_->latency_threshold_ms, params_->protocol,
+          parser_, std::move(backend_), std::move(manager), &profiler_,
+          params_->measurement_request_count, params_->measurement_mode,
+          params_->mpi_driver, params_->metrics_interval_ms,
+          params_->should_collect_metrics, params_->overhead_pct_threshold,
+          collector_, !params_->profile_export_file.empty()),
+      "failed to create profiler");
+}
+
+void
+PerfAnalyzer::PrerunReport()
+{
+  std::cout << "*** Measurement Settings ***" << std::endl;
+  if (params_->kind == cb::BackendKind::TRITON || params_->using_batch_size) {
+    std::cout << "  Batch size: " << params_->batch_size << std::endl;
+  }
+  if (params_->kind == cb::BackendKind::TRITON_C_API) {
+    std::cout << "  Service Kind: Triton C-API" << std::endl;
+  } else if (params_->kind == cb::BackendKind::TRITON) {
+    std::cout << "  Service Kind: Triton" << std::endl;
+  } else if (params_->kind == cb::BackendKind::TORCHSERVE) {
+    std::cout << "  Service Kind: TorchServe" << std::endl;
+  } else if (params_->kind == cb::BackendKind::TENSORFLOW_SERVING) {
+    std::cout << "  Service Kind: TensorFlow Serving" << std::endl;
+  }
+
+  if (params_->measurement_mode == pa::MeasurementMode::COUNT_WINDOWS) {
+    std::cout << "  Using \"count_windows\" mode for stabilization"
+              << std::endl;
+  } else {
+    std::cout << "  Using \"time_windows\" mode for stabilization" << std::endl;
+  }
+  if (params_->measurement_mode == pa::MeasurementMode::TIME_WINDOWS) {
+    std::cout << "  Measurement window: " << params_->measurement_window_ms
+              << " msec" << std::endl;
+  } else if (params_->measurement_mode == pa::MeasurementMode::COUNT_WINDOWS) {
+    std::cout << "  Minimum number of samples in each window: "
+              << params_->measurement_request_count << std::endl;
+  }
+  if (params_->concurrency_range.end != 1) {
+    std::cout << "  Latency limit: " << params_->latency_threshold_ms << " msec"
+              << std::endl;
+    if (params_->concurrency_range.end != pa::NO_LIMIT) {
+      std::cout << "  Concurrency limit: "
+                << std::max(
+                       params_->concurrency_range.start,
+                       params_->concurrency_range.end)
+                << " concurrent requests" << std::endl;
+    }
+  }
+  if (params_->request_rate_range[pa::SEARCH_RANGE::kEND] != 1.0) {
+    std::cout << "  Latency limit: " << params_->latency_threshold_ms << " msec"
+              << std::endl;
+    if (params_->request_rate_range[pa::SEARCH_RANGE::kEND] !=
+        static_cast<double>(pa::NO_LIMIT)) {
+      std::cout << "  Request Rate limit: "
+                << std::max(
+                       params_->request_rate_range[pa::SEARCH_RANGE::kSTART],
+                       params_->request_rate_range[pa::SEARCH_RANGE::kEND])
+                << " requests per seconds" << std::endl;
+    }
+  }
+  if (params_->using_request_rate_range) {
+    if (params_->request_distribution == pa::Distribution::POISSON) {
+      std::cout << "  Using poisson distribution on request generation"
+                << std::endl;
+    } else {
+      std::cout << "  Using uniform distribution on request generation"
+                << std::endl;
+    }
+  }
+  if (params_->search_mode == pa::SearchMode::BINARY) {
+    std::cout << "  Using Binary Search algorithm" << std::endl;
+  }
+  if (params_->async) {
+    std::cout << "  Using asynchronous calls for inference" << std::endl;
+  } else {
+    std::cout << "  Using synchronous calls for inference" << std::endl;
+  }
+  if (parser_->IsDecoupled()) {
+    std::cout << "  Detected decoupled model, using the first response for "
+                 "measuring latency"
+              << std::endl;
+  }
+
+  if (params_->percentile == -1) {
+    std::cout << "  Stabilizing using average latency" << std::endl;
+  } else {
+    std::cout << "  Stabilizing using p" << params_->percentile << " latency"
+              << std::endl;
+  }
+  std::cout << std::endl;
+}
+
+void
+PerfAnalyzer::Profile()
+{
+  params_->mpi_driver->MPIBarrierWorld();
+
+  cb::Error err;
+  if (params_->targeting_concurrency()) {
+    err = profiler_->Profile<size_t>(
+        params_->concurrency_range.start, params_->concurrency_range.end,
+        params_->concurrency_range.step, params_->search_mode, perf_statuses_);
+  } else {
+    err = profiler_->Profile<double>(
+        params_->request_rate_range[pa::SEARCH_RANGE::kSTART],
+        params_->request_rate_range[pa::SEARCH_RANGE::kEND],
+        params_->request_rate_range[pa::SEARCH_RANGE::kSTEP],
+        params_->search_mode, perf_statuses_);
+  }
+
+  params_->mpi_driver->MPIBarrierWorld();
+
+  if (!err.IsOk()) {
+    std::cerr << err;
+    // In the case of early_exit, the thread does not return and continues to
+    // report the summary
+    if (!pa::early_exit) {
+      throw pa::PerfAnalyzerException(err.Err());
+    }
+  }
+}
+
+void
+PerfAnalyzer::WriteReport()
+{
+  if (!perf_statuses_.size()) {
+    return;
+  }
+
+  // Can print more depending on verbose, but it seems too much information
+  std::cout << "Inferences/Second vs. Client ";
+  if (params_->percentile == -1) {
+    std::cout << "Average Batch Latency" << std::endl;
+  } else {
+    std::cout << "p" << params_->percentile << " Batch Latency" << std::endl;
+  }
+
+  for (pa::PerfStatus& status : perf_statuses_) {
+    if (params_->targeting_concurrency()) {
+      std::cout << "Concurrency: " << status.concurrency << ", ";
+    } else {
+      std::cout << "Request Rate: " << status.request_rate << ", ";
+    }
+    std::cout << "throughput: " << status.client_stats.infer_per_sec
+              << " infer/sec, latency "
+              << (status.stabilizing_latency_ns / 1000) << " usec" << std::endl;
+  }
+
+  bool should_output_metrics{
+      params_->should_collect_metrics && params_->verbose_csv};
+
+  std::unique_ptr<pa::ReportWriter> writer;
+
+  FAIL_IF_ERR(
+      pa::ReportWriter::Create(
+          params_->filename, params_->targeting_concurrency(), perf_statuses_,
+          params_->verbose_csv, profiler_->IncludeServerStats(),
+          params_->percentile, parser_, &writer, should_output_metrics),
+      "failed to create report writer");
+
+  writer->GenerateReport();
+}
+
+void
+PerfAnalyzer::GenerateProfileExport()
+{
+  if (!params_->profile_export_file.empty()) {
+    exporter_->Export(
+        collector_->GetData(), collector_->GetVersion(),
+        params_->profile_export_file);
+  }
+}
+
+void
+PerfAnalyzer::Finalize()
+{
+  params_->mpi_driver->MPIFinalize();
+}
--- a/src/c++/perf_analyzer/perf_analyzer.h
+++ b/src/c++/perf_analyzer/perf_analyzer.h
+// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <getopt.h>
+#include <signal.h>
+
+#include <algorithm>
+
+#include "command_line_parser.h"
+#include "concurrency_manager.h"
+#include "custom_load_manager.h"
+#include "inference_profiler.h"
+#include "model_parser.h"
+#include "mpi_utils.h"
+#include "perf_utils.h"
+#include "profile_data_collector.h"
+#include "profile_data_exporter.h"
+
+// Perf Analyzer provides various metrics to measure the performance of
+// the inference server. It can either be used to measure the throughput,
+// latency and time distribution under specific setting (i.e. fixed batch size
+// and fixed concurrent requests), or be used to generate throughput-latency
+// data point under dynamic setting (i.e. collecting throughput-latency data
+// under different load level).
+//
+// The following data is collected and used as part of the metrics:
+// - Throughput (infer/sec):
+//     The number of inference processed per second as seen by the analyzer.
+//     The number of inference is measured by the multiplication of the number
+//     of requests and their batch size. And the total time is the time elapsed
+//     from when the analyzer starts sending requests to when it received
+//     all responses.
+// - Latency (usec):
+//     The average elapsed time between when a request is sent and
+//     when the response for the request is received. If 'percentile' flag is
+//     specified, the selected percentile value will be reported instead of
+//     average value.
+//
+// Perf Analyzer determines the stability of throughput and latency by observing
+// measurements in different trials. If the latency and throughput, are within
+// the stability percentage (see --stability-percentage option) Perf Analyzer
+// will report the average of the throughput and latency numbers observed in the
+// last three trials. All the measurements gathered during the last three trials
+// is aggregated to generate a single report. The number of total requests is
+// the sum of all the requests in the individual measurement windows.
+//
+// There are broadly three ways to load server for the data collection using
+// perf_analyzer:
+// - Maintaining Target Concurrency:
+//     In this setting, the analyzer will maintain a target number of concurrent
+//     requests sent to the server (see --concurrency-range option) while
+//     taking measurements.
+//     The number of requests will be the total number of requests sent within
+//     the time interval for measurement (see --measurement-interval option) and
+//     the latency will be the average latency across all requests.
+//
+//     Besides throughput and latency, which is measured on client side,
+//     the following data measured by the server will also be reported
+//     in this setting:
+//     - Concurrent request: the number of concurrent requests as specified
+//         in --concurrency-range option. Note, for running perf analyzer for
+//         a single concurrency, user must specify --concurrency-range
+//         <'start'>, omitting 'end' and 'step' values.
+//     - Batch size: the batch size of each request as specified in -b option
+//     - Inference count: batch size * number of inference requests
+//     - Cumulative time: the total time between request received and
+//         response sent on the requests sent by perf analyzer.
+//     - Average Cumulative time: cumulative time / number of inference requests
+//     - Compute time: the total time it takes to run inferencing including time
+//         copying input tensors to GPU memory, time executing the model,
+//         and time copying output tensors from GPU memory for the requests
+//         sent by perf analyzer.
+//     - Average compute time: compute time / number of inference requests
+//     - Queue time: the total time it takes to wait for an available model
+//         instance for the requests sent by perf analyzer.
+//     - Average queue time: queue time / number of inference requests
+//     If all fields of --concurrency-range are specified, the analyzer will
+//     perform the following procedure:
+//       1. Follows the procedure in fixed concurrent request mode using
+//          k concurrent requests (k starts at 'start').
+//       2. Gathers data reported from step 1.
+//       3. Increases k by 'step' and repeats step 1 and 2 until latency from
+//          current iteration exceeds latency threshold (see --latency-threshold
+//          option) or concurrency level reaches 'end'. Note, by setting
+//          --latency-threshold or 'end' to 0 the effect of each threshold can
+//          be removed. However, both can not be 0 simultaneously.
+//     At each iteration, the data mentioned in fixed concurrent request mode
+//     will be reported. Besides that, after the procedure above, a collection
+//     of "throughput, latency, concurrent request count" tuples will be
+//     reported in increasing load level order.
+//
+// - Maintaining Target Request Rate:
+//     This mode is enabled only when --request-rate-range option is specified.
+//     Unlike above, here the analyzer will try to maintain a target rate of
+//     requests issued to the server while taking measurements. Rest of the
+//     behaviour of analyzer is identical as above. It is important to note that
+//     even though over a  sufficiently large interval the rate of requests
+//     will tend to the target request rate, the actual request rate for a small
+//     time interval will depend upon the selected request distribution
+//     (--request-distribution). For 'constant' request distribution the time
+//     interval between successive requests is maintained to be constant, hence
+//     request rate is constant over time. However, 'poisson' request
+//     distribution varies the time interval between successive requests such
+//     that there are periods of bursts and nulls in request generation.
+//     Additionally, 'poisson' distribution mimics the real-world traffic and
+//     can be used to obtain measurements for a realistic-load.
+//     With each request-rate, the analyzer also reports the 'Delayed Request
+//     Count' which gives an idea of how many requests missed their schedule as
+//     specified by the distribution. Users can use --max-threads to increase
+//     the number of threads which might help in dispatching requests as per
+//     the schedule. Also note that a very large number of threads might be
+//     counter-productive with most of the time being spent on context-switching
+//     the threads.
+//
+// - Following User Provided Request Delivery Schedule:
+//     This mode is enabled only when --request-intervals option is specified.
+//     In this case, analyzer will try to dispatch the requests to the server
+//     with time intervals between successive requests specified in a user
+//     provided file. This file should contain time intervals in microseconds in
+//     each new line. Analyzer will loop around the values to produce a
+//     consistent load for measurements. Once, the readings are stabilized then
+//     the final statistics will be reported. The statistics will include
+//     'Delayed Request Count' for the requests that missed their schedule. As
+//     described before, users can tune --max-threads to allow analyzer in
+//     keeping up with the schedule. This mode will help user in analyzing the
+//     performance of the server under different custom settings which may be of
+//     interest.
+//
+// By default, perf_analyzer will maintain target concurrency while measuring
+// the performance.
+//
+// Options:
+// -b: batch size for each request sent.
+// --concurrency-range: The range of concurrency levels perf_analyzer will use.
+//    A concurrency level indicates the number of concurrent requests in queue.
+// --request-rate-range: The range of request rates perf_analyzer will use to
+//    load the server.
+// --request-intervals: File containing time intervals (in microseconds) to use
+//    between successive requests.
+// --latency-threshold: latency threshold in msec.
+// --measurement-interval: time interval for each measurement window in msec.
+// --async: Enables Asynchronous inference calls.
+// --binary-search: Enables binary search within the specified range.
+// --request-distribution: Allows user to specify the distribution for selecting
+//    the time intervals between the request dispatch.
+//
+// For detail of the options not listed, please refer to the usage.
+//
+class PerfAnalyzer {
+ public:
+  PerfAnalyzer(pa::PAParamsPtr params);
+  virtual ~PerfAnalyzer(){};
+
+  // Main runner function for Perf Analyzer.
+  void Run();
+
+ private:
+  pa::PAParamsPtr params_;
+  std::unique_ptr<pa::InferenceProfiler> profiler_;
+  std::unique_ptr<cb::ClientBackend> backend_;
+  std::shared_ptr<pa::ModelParser> parser_;
+  std::vector<pa::PerfStatus> perf_statuses_;
+  std::shared_ptr<pa::ProfileDataCollector> collector_;
+  std::shared_ptr<pa::ProfileDataExporter> exporter_;
+
+  //
+  // Helper methods
+  //
+
+  // Parse the options out of the command line argument
+  //
+  void CreateAnalyzerObjects();
+  void PrerunReport();
+  void Profile();
+  void WriteReport();
+  void GenerateProfileExport();
+  void Finalize();
+};
--- a/src/c++/perf_analyzer/perf_analyzer_exception.h
+++ b/src/c++/perf_analyzer/perf_analyzer_exception.h
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+#pragma once
+
+#include <exception>
+#include <string>
+
+namespace triton { namespace perfanalyzer {
+
+// Perf Exception error class
+//
+class PerfAnalyzerException : public std::exception {
+ public:
+  PerfAnalyzerException(uint32_t error) : error_(error) {}
+
+  PerfAnalyzerException(const std::string& message, uint32_t error)
+      : message_(message), error_(error)
+  {
+  }
+
+  virtual const char* what() const throw() { return message_.c_str(); }
+
+  inline int GetError() const { return error_; }
+
+ private:
+  const std::string message_{""};
+  uint32_t error_;
+};
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/perf_analyzer_unit_tests.cc
+++ b/src/c++/perf_analyzer/perf_analyzer_unit_tests.cc
+// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file exists to hold a macro-expanded main function for the unit test
+// runner executable.
+//
+// The old contents of main.cc are needed for the unit test runner to compile,
+// but since two main functions cannot be compiled in the same executable, the
+// contents of the old main.cc were moved to a new file/class, which are now
+// included in the compilation of the unit test runner executable.
+//
+// The new contents of main.cc just include the new file/class mentioned above
+// and run the primary function from there in a simplified main function, which
+// runs Perf Analyzer.
+#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+#include "doctest.h"
--- a/src/c++/perf_analyzer/perf_utils.cc
+++ b/src/c++/perf_analyzer/perf_utils.cc
+// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "perf_utils.h"
+
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cctype>
+#include <iostream>
+#include <string>
+
+#include "client_backend/client_backend.h"
+#include "doctest.h"
+
+namespace triton { namespace perfanalyzer {
+
+cb::ProtocolType
+ParseProtocol(const std::string& str)
+{
+  std::string protocol(str);
+  std::transform(protocol.begin(), protocol.end(), protocol.begin(), ::tolower);
+  if (protocol == "http") {
+    return cb::ProtocolType::HTTP;
+  } else if (protocol == "grpc") {
+    return cb::ProtocolType::GRPC;
+  }
+  return cb::ProtocolType::UNKNOWN;
+}
+
+cb::Error
+ConvertDTypeFromTFS(const std::string& tf_dtype, std::string* datatype)
+{
+  if (tf_dtype == "DT_HALF") {
+    *datatype = "FP16";
+  } else if (tf_dtype == "DT_BFLOAT16") {
+    *datatype = "BF16";
+  } else if (tf_dtype == "DT_FLOAT") {
+    *datatype = "FP32";
+  } else if (tf_dtype == "DT_DOUBLE") {
+    *datatype = "FP64";
+  } else if (tf_dtype == "DT_INT32") {
+    *datatype = "INT32";
+  } else if (tf_dtype == "DT_INT16") {
+    *datatype = "INT16";
+  } else if (tf_dtype == "DT_UINT16") {
+    *datatype = "UINT16";
+  } else if (tf_dtype == "DT_INT8") {
+    *datatype = "INT8";
+  } else if (tf_dtype == "DT_UINT8") {
+    *datatype = "UINT8";
+  } else if (tf_dtype == "DT_STRING") {
+    *datatype = "BYTES";
+  } else if (tf_dtype == "DT_INT64") {
+    *datatype = "INT64";
+  } else if (tf_dtype == "DT_BOOL") {
+    *datatype = "BOOL";
+  } else if (tf_dtype == "DT_UINT32") {
+    *datatype = "UINT32";
+  } else if (tf_dtype == "DT_UINT64") {
+    *datatype = "UINT64";
+  } else {
+    return cb::Error(
+        "unsupported datatype encountered " + tf_dtype, pa::GENERIC_ERROR);
+  }
+
+  return cb::Error::Success;
+}
+
+bool
+IsDirectory(const std::string& path)
+{
+  struct stat s;
+  if (stat(path.c_str(), &s) == 0 && (s.st_mode & S_IFDIR)) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool
+IsFile(const std::string& complete_path)
+{
+  struct stat s;
+  if (stat(complete_path.c_str(), &s) == 0 && (s.st_mode & S_IFREG)) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+int64_t
+ByteSize(const std::vector<int64_t>& shape, const std::string& datatype)
+{
+  int one_element_size;
+  if ((datatype.compare("BOOL") == 0) || (datatype.compare("INT8") == 0) ||
+      (datatype.compare("UINT8") == 0)) {
+    one_element_size = 1;
+  } else if (
+      (datatype.compare("INT16") == 0) || (datatype.compare("UINT16") == 0) ||
+      (datatype.compare("FP16") == 0) || (datatype.compare("BF16") == 0)) {
+    one_element_size = 2;
+  } else if (
+      (datatype.compare("INT32") == 0) || (datatype.compare("UINT32") == 0) ||
+      (datatype.compare("FP32") == 0)) {
+    one_element_size = 4;
+  } else if (
+      (datatype.compare("INT64") == 0) || (datatype.compare("UINT64") == 0) ||
+      (datatype.compare("FP64") == 0)) {
+    one_element_size = 8;
+  } else {
+    return -1;
+  }
+
+  int64_t count = ElementCount(shape);
+  if (count < 0) {
+    return count;
+  }
+
+  return (one_element_size * count);
+}
+
+int64_t
+ElementCount(const std::vector<int64_t>& shape)
+{
+  int64_t count = 1;
+  bool is_dynamic = false;
+  for (const auto dim : shape) {
+    if (dim == -1) {
+      is_dynamic = true;
+    } else {
+      count *= dim;
+    }
+  }
+
+  if (is_dynamic) {
+    count = -1;
+  }
+  return count;
+}
+
+void
+SerializeStringTensor(
+    std::vector<std::string> string_tensor, std::vector<char>* serialized_data)
+{
+  std::string serialized = "";
+  for (auto s : string_tensor) {
+    uint32_t len = s.size();
+    serialized.append(reinterpret_cast<const char*>(&len), sizeof(uint32_t));
+    serialized.append(s);
+  }
+
+  std::copy(
+      serialized.begin(), serialized.end(),
+      std::back_inserter(*serialized_data));
+}
+
+cb::Error
+SerializeExplicitTensor(
+    const rapidjson::Value& tensor, const std::string& dt,
+    std::vector<char>* decoded_data)
+{
+  if (dt.compare("BYTES") == 0) {
+    std::string serialized = "";
+    for (const auto& value : tensor.GetArray()) {
+      if (!value.IsString()) {
+        return cb::Error(
+            "unable to find string data in json", pa::GENERIC_ERROR);
+      }
+      std::string element(value.GetString());
+      uint32_t len = element.size();
+      serialized.append(reinterpret_cast<const char*>(&len), sizeof(uint32_t));
+      serialized.append(element);
+    }
+    std::copy(
+        serialized.begin(), serialized.end(),
+        std::back_inserter(*decoded_data));
+  } else {
+    for (const auto& value : tensor.GetArray()) {
+      if (dt.compare("BOOL") == 0) {
+        if (!value.IsBool()) {
+          return cb::Error(
+              "unable to find bool data in json", pa::GENERIC_ERROR);
+        }
+        bool element(value.GetBool());
+        const char* src = reinterpret_cast<const char*>(&element);
+        decoded_data->insert(decoded_data->end(), src, src + sizeof(bool));
+      } else if (dt.compare("UINT8") == 0) {
+        if (!value.IsUint()) {
+          return cb::Error(
+              "unable to find uint8_t data in json", pa::GENERIC_ERROR);
+        }
+        uint8_t element(static_cast<uint8_t>(value.GetUint()));
+        const char* src = reinterpret_cast<const char*>(&element);
+        decoded_data->insert(decoded_data->end(), src, src + sizeof(uint8_t));
+      } else if (dt.compare("INT8") == 0) {
+        if (!value.IsInt()) {
+          return cb::Error(
+              "unable to find int8_t data in json", pa::GENERIC_ERROR);
+        }
+        int8_t element(static_cast<int8_t>(value.GetInt()));
+        const char* src = reinterpret_cast<const char*>(&element);
+        decoded_data->insert(decoded_data->end(), src, src + sizeof(int8_t));
+      } else if (dt.compare("UINT16") == 0) {
+        if (!value.IsUint()) {
+          return cb::Error(
+              "unable to find uint16_t data in json", pa::GENERIC_ERROR);
+        }
+        uint16_t element(static_cast<uint16_t>(value.GetUint()));
+        const char* src = reinterpret_cast<const char*>(&element);
+        decoded_data->insert(decoded_data->end(), src, src + sizeof(uint16_t));
+      } else if (dt.compare("INT16") == 0) {
+        if (!value.IsInt()) {
+          return cb::Error(
+              "unable to find int16_t data in json", pa::GENERIC_ERROR);
+        }
+        int16_t element(static_cast<int16_t>(value.GetInt()));
+        const char* src = reinterpret_cast<const char*>(&element);
+        decoded_data->insert(decoded_data->end(), src, src + sizeof(int16_t));
+      } else if (dt.compare("FP16") == 0) {
+        return cb::Error(
+            "Can not use explicit tensor description for fp16 datatype",
+            pa::GENERIC_ERROR);
+      } else if (dt.compare("BF16") == 0) {
+        return cb::Error(
+            "Can not use explicit tensor description for bf16 datatype",
+            pa::GENERIC_ERROR);
+      } else if (dt.compare("UINT32") == 0) {
+        if (!value.IsUint()) {
+          return cb::Error(
+              "unable to find uint32_t data in json", pa::GENERIC_ERROR);
+        }
+        uint32_t element(value.GetUint());
+        const char* src = reinterpret_cast<const char*>(&element);
+        decoded_data->insert(decoded_data->end(), src, src + sizeof(uint32_t));
+      } else if (dt.compare("INT32") == 0) {
+        if (!value.IsInt()) {
+          return cb::Error(
+              "unable to find int32_t data in json", pa::GENERIC_ERROR);
+        }
+        int32_t element(value.GetInt());
+        const char* src = reinterpret_cast<const char*>(&element);
+        decoded_data->insert(decoded_data->end(), src, src + sizeof(int32_t));
+      } else if (dt.compare("FP32") == 0) {
+        if (!value.IsDouble()) {
+          return cb::Error(
+              "unable to find float data in json", pa::GENERIC_ERROR);
+        }
+        float element(value.GetFloat());
+        const char* src = reinterpret_cast<const char*>(&element);
+        decoded_data->insert(decoded_data->end(), src, src + sizeof(float));
+      } else if (dt.compare("UINT64") == 0) {
+        if (!value.IsUint64()) {
+          return cb::Error(
+              "unable to find uint64_t data in json", pa::GENERIC_ERROR);
+        }
+        uint64_t element(value.GetUint64());
+        const char* src = reinterpret_cast<const char*>(&element);
+        decoded_data->insert(decoded_data->end(), src, src + sizeof(uint64_t));
+      } else if (dt.compare("INT64") == 0) {
+        if (!value.IsInt64()) {
+          return cb::Error(
+              "unable to find int64_t data in json", pa::GENERIC_ERROR);
+        }
+        int64_t element(value.GetInt64());
+        const char* src = reinterpret_cast<const char*>(&element);
+        decoded_data->insert(decoded_data->end(), src, src + sizeof(int64_t));
+      } else if (dt.compare("FP64") == 0) {
+        if (!value.IsDouble()) {
+          return cb::Error(
+              "unable to find fp64 data in json", pa::GENERIC_ERROR);
+        }
+        double element(value.GetDouble());
+        const char* src = reinterpret_cast<const char*>(&element);
+        decoded_data->insert(decoded_data->end(), src, src + sizeof(double));
+      }
+    }
+  }
+  return cb::Error::Success;
+}
+
+std::string
+GetRandomString(const int string_length)
+{
+  std::mt19937_64 gen{std::random_device()()};
+  std::uniform_int_distribution<size_t> dist{0, character_set.length() - 1};
+  std::string random_string;
+  std::generate_n(std::back_inserter(random_string), string_length, [&] {
+    return character_set[dist(gen)];
+  });
+  return random_string;
+}
+
+std::string
+ShapeVecToString(const std::vector<int64_t> shape_vec, bool skip_first)
+{
+  bool first = true;
+  std::string str("[");
+  for (const auto& value : shape_vec) {
+    if (skip_first) {
+      skip_first = false;
+      continue;
+    }
+    if (!first) {
+      str += ",";
+    }
+    str += std::to_string(value);
+    first = false;
+  }
+
+  str += "]";
+  return str;
+}
+
+std::string
+TensorToRegionName(std::string name)
+{
+  // Remove slashes from the name, if any.
+  name.erase(
+      std::remove_if(
+          name.begin(), name.end(),
+          [](const char& c) { return ((c == '/') || (c == '\\')); }),
+      name.end());
+  return name;
+}
+
+template <>
+std::function<std::chrono::nanoseconds(std::mt19937&)>
+ScheduleDistribution<Distribution::POISSON>(const double request_rate)
+{
+  std::exponential_distribution<> dist =
+      std::exponential_distribution<>(request_rate);
+  return [dist](std::mt19937& gen) mutable {
+    return std::chrono::duration_cast<std::chrono::nanoseconds>(
+        std::chrono::duration<double>(dist(gen)));
+  };
+}
+
+template <>
+std::function<std::chrono::nanoseconds(std::mt19937&)>
+ScheduleDistribution<Distribution::CONSTANT>(const double request_rate)
+{
+  std::chrono::nanoseconds period =
+      std::chrono::duration_cast<std::chrono::nanoseconds>(
+          std::chrono::duration<double>(1.0 / request_rate));
+  return [period](std::mt19937& /*gen*/) { return period; };
+}
+
+cb::TensorFormat
+ParseTensorFormat(const std::string& content_type_str)
+{
+  std::string content_type_str_lowercase{content_type_str};
+  std::transform(
+      content_type_str.cbegin(), content_type_str.cend(),
+      content_type_str_lowercase.begin(),
+      [](unsigned char c) { return std::tolower(c); });
+  if (content_type_str_lowercase == "binary") {
+    return cb::TensorFormat::BINARY;
+  } else if (content_type_str_lowercase == "json") {
+    return cb::TensorFormat::JSON;
+  } else {
+    return cb::TensorFormat::UNKNOWN;
+  }
+}
+
+}}  // namespace triton::perfanalyzer
--- a/src/c++/perf_analyzer/perf_utils.h
+++ b/src/c++/perf_analyzer/perf_utils.h
+// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <rapidjson/document.h>
+#include <rapidjson/rapidjson.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include <chrono>
+#include <fstream>
+#include <functional>
+#include <iomanip>
+#include <iostream>
+#include <memory>
+#include <random>
+
+#include "client_backend/client_backend.h"
+
+namespace pa = triton::perfanalyzer;
+namespace cb = triton::perfanalyzer::clientbackend;
+
+namespace triton { namespace perfanalyzer {
+
+constexpr uint64_t NANOS_PER_SECOND = 1000000000;
+constexpr uint64_t NANOS_PER_MILLIS = 1000000;
+#define CHRONO_TO_NANOS(TS)                                                    \
+  (std::chrono::duration_cast<std::chrono::nanoseconds>(TS.time_since_epoch()) \
+       .count())
+#define CHRONO_TO_MILLIS(TS) (CHRONO_TO_NANOS(TS) / pa::NANOS_PER_MILLIS)
+
+//==============================================================================
+
+// Will use the characters specified here to construct random strings
+std::string const character_set =
+    "abcdefghijklmnaoqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890 .?!";
+
+// A boolean flag to mark an interrupt and commencement of early exit
+extern volatile bool early_exit;
+
+enum Distribution { POISSON = 0, CONSTANT = 1, CUSTOM = 2 };
+enum SearchMode { LINEAR = 0, BINARY = 1, NONE = 2 };
+enum SharedMemoryType {
+  SYSTEM_SHARED_MEMORY = 0,
+  CUDA_SHARED_MEMORY = 1,
+  NO_SHARED_MEMORY = 2
+};
+
+constexpr uint64_t NO_LIMIT = 0;
+
+// Templated range class that tracks the start, stop, and step for a range.
+//
+template <typename T>
+class Range {
+ public:
+  Range(T start, T end, T step) : start(start), end(end), step(step) {}
+
+  T start;
+  T end;
+  T step;
+};
+
+// Converts the datatype from tensorflow to perf analyzer space
+// \param tf_dtype The data type string returned from the model metadata.
+// \param datatype Returns the datatype in perf_analyzer space.
+// \return error status. Returns Non-Ok if an error is encountered during
+//  read operation.
+cb::Error ConvertDTypeFromTFS(
+    const std::string& tf_dtype, std::string* datatype);
+
+// Parse the communication protocol type
+cb::ProtocolType ParseProtocol(const std::string& str);
+
+// To check whether the path points to a valid system directory
+bool IsDirectory(const std::string& path);
+
+// To check whether the path points to a valid system file
+bool IsFile(const std::string& complete_path);
+
+// Calculates the byte size tensor for given shape and datatype.
+int64_t ByteSize(
+    const std::vector<int64_t>& shape, const std::string& datatype);
+
+// Get the number of elements in the tensor for given shape.
+int64_t ElementCount(const std::vector<int64_t>& shape);
+
+// Serializes the string tensor to length prepended bytes.
+void SerializeStringTensor(
+    std::vector<std::string> string_tensor, std::vector<char>* serialized_data);
+
+// Serializes an explicit tensor read from the data file to the
+// raw bytes.
+cb::Error SerializeExplicitTensor(
+    const rapidjson::Value& tensor, const std::string& dt,
+    std::vector<char>* decoded_data);
+
+// Generates a random string of specified length using characters specified in
+// character_set.
+std::string GetRandomString(const int string_length);
+
+// Returns the shape string containing the values provided in the vector
+std::string ShapeVecToString(
+    const std::vector<int64_t> shape_vec, bool skip_first = false);
+
+// Remove slashes from tensor name, if any
+std::string TensorToRegionName(std::string name);
+
+// Returns the request schedule distribution generator with the specified
+// request rate.
+template <Distribution distribution>
+std::function<std::chrono::nanoseconds(std::mt19937&)> ScheduleDistribution(
+    const double request_rate);
+
+// Parse the HTTP tensor format
+cb::TensorFormat ParseTensorFormat(const std::string& tensor_format_str);
+
+}}  // namespace triton::perfanalyzer