Initial commit

c68e1835 · lijian6 · c68e1835 · c68e1835 · c68e1835 · c68e1835
Commit c68e1835 authored Sep 18, 2023 by lijian6
20 changed files
--- a/src/c++/examples/simple_grpc_model_control.cc
+++ b/src/c++/examples/simple_grpc_model_control.cc
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <unistd.h>
+
+#include <iostream>
+#include <string>
+
+#include "grpc_client.h"
+
+namespace tc = triton::client;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service>" << std::endl;
+  std::cerr << "\t-H <HTTP header>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl;
+
+  exit(1);
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  std::string url("localhost:8001");
+  tc::Headers http_headers;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
+    switch (opt) {
+      case 'v':
+        verbose = true;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  std::string model_name = "simple";
+
+  // Create a InferenceServerGrpcClient instance to communicate with the
+  // server using gRPC protocol.
+  std::unique_ptr<tc::InferenceServerGrpcClient> client;
+  FAIL_IF_ERR(
+      tc::InferenceServerGrpcClient::Create(&client, url, verbose),
+      "unable to create grpc client");
+
+  inference::RepositoryIndexResponse repository_index;
+  FAIL_IF_ERR(
+      client->ModelRepositoryIndex(&repository_index, http_headers),
+      "Failed to get repository index");
+  if (repository_index.models().size() != 8) {
+    std::cerr << "expected number of models 8, got "
+              << repository_index.models().size() << std::endl;
+    exit(1);
+  }
+
+  FAIL_IF_ERR(
+      client->LoadModel(model_name, http_headers), "Failed to load model");
+  bool model_ready;
+  FAIL_IF_ERR(
+      client->IsModelReady(&model_ready, model_name),
+      "unable to get model readiness");
+  if (!model_ready) {
+    std::cerr << "error: model " << model_name << " is not live" << std::endl;
+    exit(1);
+  }
+
+  FAIL_IF_ERR(
+      client->UnloadModel(model_name, http_headers), "Failed to unload model");
+  FAIL_IF_ERR(
+      client->IsModelReady(&model_ready, model_name),
+      "unable to get model readiness");
+  if (model_ready) {
+    std::cerr << "error: model " << model_name << " is live after unloading"
+              << std::endl;
+    exit(1);
+  }
+
+  tc::Error err = client->LoadModel("wrong_model_name", http_headers);
+  if (err.IsOk()) {
+    std::cerr << "error: wrong model name was successfully loaded" << std::endl;
+  }
+  return 0;
+}
--- a/src/c++/examples/simple_grpc_sequence_stream_infer_client.cc
+++ b/src/c++/examples/simple_grpc_sequence_stream_infer_client.cc
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <unistd.h>
+
+#include <condition_variable>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "grpc_client.h"
+
+namespace tc = triton::client;
+
+using ResultList = std::vector<std::shared_ptr<tc::InferResult>>;
+
+// Global mutex to synchronize the threads
+std::mutex mutex_;
+std::condition_variable cv_;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service and its gRPC port>"
+            << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl;
+  std::cerr << "\t-t <stream timeout in microseconds>" << std::endl;
+  std::cerr << "\t-o <offset for sequence ID>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr << "For -o, the client will use sequence ID <1 + 2 * offset> "
+            << "and <2 + 2 * offset>. Default offset is 0." << std::endl;
+
+  exit(1);
+}
+
+void
+StreamSend(
+    const std::unique_ptr<tc::InferenceServerGrpcClient>& client,
+    tc::InferOptions& options, int32_t value, const int32_t index)
+{
+  // Initialize the inputs with the data.
+  tc::InferInput* input;
+  std::vector<int64_t> shape{1, 1};
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input, "INPUT", shape, "INT32"),
+      "unable to create 'INPUT'");
+  std::shared_ptr<tc::InferInput> ivalue(input);
+  FAIL_IF_ERR(ivalue->Reset(), "unable to reset 'INPUT'");
+  FAIL_IF_ERR(
+      ivalue->AppendRaw(reinterpret_cast<uint8_t*>(&value), sizeof(int32_t)),
+      "unable to set data for 'INPUT'");
+
+  std::vector<tc::InferInput*> inputs = {ivalue.get()};
+
+  // Send inference request to the inference server.
+  FAIL_IF_ERR(client->AsyncStreamInfer(options, inputs), "unable to run model");
+}
+
+void
+StreamSend(
+    const std::unique_ptr<tc::InferenceServerGrpcClient>& client,
+    const std::string& model_name, int32_t value, const uint64_t sequence_id,
+    bool start_of_sequence, bool end_of_sequence, const int32_t index)
+{
+  // Stream send for unsigned int sequence IDs
+  tc::InferOptions options(model_name);
+  options.sequence_id_ = sequence_id;
+  options.sequence_start_ = start_of_sequence;
+  options.sequence_end_ = end_of_sequence;
+  options.request_id_ =
+      std::to_string(sequence_id) + "_" + std::to_string(index);
+
+  StreamSend(client, options, value, index);
+}
+
+void
+StreamSend(
+    const std::unique_ptr<tc::InferenceServerGrpcClient>& client,
+    const std::string& model_name, int32_t value,
+    const std::string& sequence_id, bool start_of_sequence,
+    bool end_of_sequence, const int32_t index)
+{
+  // Stream send for string sequence IDs
+  tc::InferOptions options(model_name);
+  options.sequence_id_str_ = sequence_id;
+  options.sequence_start_ = start_of_sequence;
+  options.sequence_end_ = end_of_sequence;
+  options.request_id_ = sequence_id + "_" + std::to_string(index);
+
+  StreamSend(client, options, value, index);
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  bool dyna_sequence = false;
+  std::string url("localhost:8001");
+  tc::Headers http_headers;
+  int sequence_id_offset = 0;
+  uint32_t stream_timeout = 0;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vdu:H:t:o:")) != -1) {
+    switch (opt) {
+      case 'v':
+        verbose = true;
+        break;
+      case 'd':
+        dyna_sequence = true;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case 't':
+        stream_timeout = std::stoi(optarg);
+        break;
+      case 'o':
+        sequence_id_offset = std::stoi(optarg);
+        break;
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  tc::Error err;
+
+  // We use the custom "sequence" model which takes 1 input value. The
+  // output is the accumulated value of the inputs. See
+  // src/custom/sequence.
+  std::string int_model_name =
+      dyna_sequence ? "simple_dyna_sequence" : "simple_sequence";
+  std::string string_model_name =
+      dyna_sequence ? "simple_string_dyna_sequence" : "simple_sequence";
+
+  const uint64_t int_sequence_id0 = 1 + sequence_id_offset * 2;
+  const uint64_t int_sequence_id1 = 2 + sequence_id_offset * 2;
+
+  // For string sequence IDs, the dyna backend requires that the
+  // sequence id be decodable into an integer, otherwise we'll use
+  // a test string sequence id and a model that doesn't require corrid
+  // control.
+  const std::string string_sequence_id0 =
+      dyna_sequence ? std::to_string(3 + sequence_id_offset * 2) : "SEQ-3";
+
+  std::cout << "sequence ID " << int_sequence_id0 << " : "
+            << "sequence ID " << int_sequence_id1 << " : "
+            << "sequence ID " << string_sequence_id0 << std::endl;
+
+  // Create a InferenceServerGrpcClient instance to communicate with the
+  // server using gRPC protocol.
+  std::unique_ptr<tc::InferenceServerGrpcClient> client;
+  FAIL_IF_ERR(
+      tc::InferenceServerGrpcClient::Create(&client, url, verbose),
+      "unable to create grpc client");
+
+  // Now send the inference sequences..
+  //
+  std::vector<int32_t> values{11, 7, 5, 3, 2, 0, 1};
+  ResultList result_list;
+
+  FAIL_IF_ERR(
+      client->StartStream(
+          [&](tc::InferResult* result) {
+            {
+              std::shared_ptr<tc::InferResult> result_ptr(result);
+              std::lock_guard<std::mutex> lk(mutex_);
+              result_list.push_back(result_ptr);
+            }
+            cv_.notify_all();
+          },
+          false /*ship_stats*/, stream_timeout, http_headers),
+      "unable to establish a streaming connection to server");
+
+  // Send requests, first reset accumulator for the sequence.
+  int32_t index = 0;
+  StreamSend(
+      client, int_model_name, 0, int_sequence_id0, true /* start-of-sequence */,
+      false /* end-of-sequence */, index++);
+  StreamSend(
+      client, int_model_name, 100, int_sequence_id1,
+      true /* start-of-sequence */, false /* end-of-sequence */, index++);
+  StreamSend(
+      client, string_model_name, 20, string_sequence_id0,
+      true /* start-of-sequence */, false /* end-of-sequence */, index++);
+
+  // Now send a sequence of values...
+  for (int32_t v : values) {
+    StreamSend(
+        client, int_model_name, v, int_sequence_id0,
+        false /* start-of-sequence */, (v == 1) /* end-of-sequence */, index++);
+    StreamSend(
+        client, int_model_name, -v, int_sequence_id1,
+        false /* start-of-sequence */, (v == 1) /* end-of-sequence */, index++);
+    StreamSend(
+        client, string_model_name, -v, string_sequence_id0,
+        false /* start-of-sequence */, (v == 1) /* end-of-sequence */, index++);
+  }
+
+  if (stream_timeout == 0) {
+    // Wait until all callbacks are invoked
+    {
+      std::unique_lock<std::mutex> lk(mutex_);
+      cv_.wait(lk, [&]() {
+        if (result_list.size() > (3 * values.size() + 2)) {
+          return true;
+        } else {
+          return false;
+        }
+      });
+    }
+  } else {
+    auto timeout = std::chrono::microseconds(stream_timeout);
+    // Wait until all callbacks are invoked or the timeout expires
+    {
+      std::unique_lock<std::mutex> lk(mutex_);
+      if (!cv_.wait_for(lk, timeout, [&]() {
+            return (result_list.size() > (3 * values.size() + 2));
+          })) {
+        std::cerr << "Stream has been closed" << std::endl;
+        exit(1);
+      }
+    }
+  }
+
+  // Extract data from the result
+  std::vector<int32_t> int_result0_data;
+  std::vector<int32_t> int_result1_data;
+  std::vector<int32_t> string_result0_data;
+  for (const auto& this_result : result_list) {
+    auto err = this_result->RequestStatus();
+    if (!err.IsOk()) {
+      std::cerr << "The inference failed: " << err << std::endl;
+      exit(1);
+    }
+    // Get pointers to the result returned...
+    int32_t* output_data;
+    size_t output_byte_size;
+    FAIL_IF_ERR(
+        this_result->RawData(
+            "OUTPUT", (const uint8_t**)&output_data, &output_byte_size),
+        "unable to get result data for 'OUTPUT'");
+    if (output_byte_size != 4) {
+      std::cerr << "error: received incorrect byte size for 'OUTPUT': "
+                << output_byte_size << std::endl;
+      exit(1);
+    }
+
+    std::string request_id;
+    FAIL_IF_ERR(
+        this_result->Id(&request_id), "unable to get request id for response");
+    try {
+      std::string this_sequence_id =
+          std::string(request_id, 0, request_id.find("_"));
+
+      if (std::stoi(this_sequence_id) == int_sequence_id0) {
+        int_result0_data.push_back(*output_data);
+      } else if (std::stoi(this_sequence_id) == int_sequence_id1) {
+        int_result1_data.push_back(*output_data);
+      } else if (this_sequence_id == string_sequence_id0) {
+        string_result0_data.push_back(*output_data);
+      } else {
+        std::cerr << "error: received incorrect sequence id in response: "
+                  << this_sequence_id << std::endl;
+        exit(1);
+      }
+    }
+    catch (std::invalid_argument& e) {
+      // stoi will throw this when called with the test sequence SEQ3
+      string_result0_data.push_back(*output_data);
+    }
+  }
+
+  for (size_t i = 0; i < int_result0_data.size(); i++) {
+    int32_t int_seq0_expected = (i == 0) ? 1 : values[i - 1];
+    int32_t int_seq1_expected = (i == 0) ? 101 : values[i - 1] * -1;
+    int32_t string_seq0_expected;
+
+    // For string sequence ID case we are testing two different backends
+    if ((i == 0) && dyna_sequence) {
+      string_seq0_expected = 20;
+    } else if ((i == 0) && !dyna_sequence) {
+      string_seq0_expected = 21;
+    } else if ((i != 0) && dyna_sequence) {
+      string_seq0_expected = values[i - 1] * -1 + string_result0_data[i - 1];
+    } else {
+      string_seq0_expected = values[i - 1] * -1;
+    }
+
+    // The dyna_sequence custom backend adds the sequence ID to
+    // the last request in a sequence.
+    if (dyna_sequence && (i != 0) && (values[i - 1] == 1)) {
+      int_seq0_expected += int_sequence_id0;
+      int_seq1_expected += int_sequence_id1;
+      string_seq0_expected += std::stoi(string_sequence_id0);
+    }
+
+    std::cout << "[" << i << "] " << int_result0_data[i] << " : "
+              << int_result1_data[i] << " : " << string_result0_data[i]
+              << std::endl;
+
+    if ((int_seq0_expected != int_result0_data[i]) ||
+        (int_seq1_expected != int_result1_data[i]) ||
+        (string_seq0_expected != string_result0_data[i])) {
+      std::cout << "[ expected ] " << int_seq0_expected << " : "
+                << int_seq1_expected << " : " << string_seq0_expected
+                << std::endl;
+      return 1;
+    }
+  }
+
+  return 0;
+}
--- a/src/c++/examples/simple_grpc_sequence_sync_infer_client.cc
+++ b/src/c++/examples/simple_grpc_sequence_sync_infer_client.cc
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <unistd.h>
+
+#include <condition_variable>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "grpc_client.h"
+
+namespace tc = triton::client;
+
+using ResultList = std::vector<std::shared_ptr<tc::InferResult>>;
+
+// Global mutex to synchronize the threads
+std::mutex mutex_;
+std::condition_variable cv_;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service and its gRPC port>"
+            << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl;
+  std::cerr << "\t-o <offset for sequence ID>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr << "For -o, the client will use sequence ID <1 + 2 * offset> "
+            << "and <2 + 2 * offset>. Default offset is 0." << std::endl;
+
+  exit(1);
+}
+
+void
+SyncSend(
+    const std::unique_ptr<tc::InferenceServerGrpcClient>& client,
+    tc::InferOptions& options, int32_t value, std::vector<int32_t>& result_data,
+    tc::Headers& http_headers)
+{
+  // Initialize the inputs with the data.
+  tc::InferInput* input;
+  std::vector<int64_t> shape{1, 1};
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input, "INPUT", shape, "INT32"),
+      "unable to create 'INPUT'");
+  std::shared_ptr<tc::InferInput> ivalue(input);
+  FAIL_IF_ERR(ivalue->Reset(), "unable to reset 'INPUT'");
+  FAIL_IF_ERR(
+      ivalue->AppendRaw(reinterpret_cast<uint8_t*>(&value), sizeof(int32_t)),
+      "unable to set data for 'INPUT'");
+
+  std::vector<tc::InferInput*> inputs = {ivalue.get()};
+
+  tc::InferRequestedOutput* output;
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output, "OUTPUT"),
+      "unable to get 'OUTPUT'");
+  std::shared_ptr<const tc::InferRequestedOutput> routput;
+  routput.reset(output);
+
+  std::vector<const tc::InferRequestedOutput*> outputs = {routput.get()};
+
+  tc::InferResult* result;
+  // Send inference request to the inference server.
+  FAIL_IF_ERR(
+      client->Infer(&result, options, inputs, outputs, http_headers),
+      "unable to run model");
+  std::shared_ptr<tc::InferResult> this_result(result);
+
+  // Get pointers to the result returned...
+  int32_t* output_data;
+  size_t output_byte_size;
+  FAIL_IF_ERR(
+      this_result->RawData(
+          "OUTPUT", (const uint8_t**)&output_data, &output_byte_size),
+      "unable to get result data for 'OUTPUT'");
+  if (output_byte_size != 4) {
+    std::cerr << "error: received incorrect byte size for 'OUTPUT': "
+              << output_byte_size << std::endl;
+    exit(1);
+  }
+
+  result_data.push_back(*output_data);
+}
+
+void
+SyncSend(
+    const std::unique_ptr<tc::InferenceServerGrpcClient>& client,
+    const std::string& model_name, int32_t value, const uint64_t sequence_id,
+    bool start_of_sequence, bool end_of_sequence,
+    std::vector<int32_t>& result_data, tc::Headers& http_headers)
+{
+  tc::InferOptions options(model_name);
+  options.sequence_id_ = sequence_id;
+  options.sequence_start_ = start_of_sequence;
+  options.sequence_end_ = end_of_sequence;
+
+  SyncSend(client, options, value, result_data, http_headers);
+}
+
+void
+SyncSend(
+    const std::unique_ptr<tc::InferenceServerGrpcClient>& client,
+    const std::string& model_name, int32_t value,
+    const std::string& sequence_id, bool start_of_sequence,
+    bool end_of_sequence, std::vector<int32_t>& result_data,
+    tc::Headers& http_headers)
+{
+  tc::InferOptions options(model_name);
+  options.sequence_id_str_ = sequence_id;
+  options.sequence_start_ = start_of_sequence;
+  options.sequence_end_ = end_of_sequence;
+
+  SyncSend(client, options, value, result_data, http_headers);
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  bool dyna_sequence = false;
+  std::string url("localhost:8001");
+  int sequence_id_offset = 0;
+  tc::Headers http_headers;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vdu:H:o:")) != -1) {
+    switch (opt) {
+      case 'v':
+        verbose = true;
+        break;
+      case 'd':
+        dyna_sequence = true;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case 'o':
+        sequence_id_offset = std::stoi(optarg);
+        break;
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  tc::Error err;
+
+  // We use the custom "sequence" model which takes 1 input value. The
+  // output is the accumulated value of the inputs. See
+  // src/custom/sequence.
+  std::string int_model_name =
+      dyna_sequence ? "simple_dyna_sequence" : "simple_sequence";
+  std::string string_model_name =
+      dyna_sequence ? "simple_string_dyna_sequence" : "simple_sequence";
+
+  const uint64_t int_sequence_id0 = 1 + sequence_id_offset * 2;
+  const uint64_t int_sequence_id1 = 2 + sequence_id_offset * 2;
+
+  // For string sequence IDs, the dyna backend requires that the
+  // sequence id be decodable into an integer, otherwise we'll use
+  // a test string sequence id and a model that doesn't require corrid
+  // control.
+  const std::string string_sequence_id0 =
+      dyna_sequence ? std::to_string(3 + sequence_id_offset * 2) : "SEQ-3";
+
+  std::cout << "sequence ID " << int_sequence_id0 << " : "
+            << "sequence ID " << int_sequence_id1 << " : "
+            << "sequence ID " << string_sequence_id0 << std::endl;
+
+  // Create a InferenceServerGrpcClient instance to communicate with the
+  // server using gRPC protocol.
+  std::unique_ptr<tc::InferenceServerGrpcClient> client;
+  FAIL_IF_ERR(
+      tc::InferenceServerGrpcClient::Create(&client, url, verbose),
+      "unable to create grpc client");
+
+  // Now send the inference sequences..
+  //
+  std::vector<int32_t> values{11, 7, 5, 3, 2, 0, 1};
+  std::vector<int32_t> int_result0_data;
+  std::vector<int32_t> int_result1_data;
+  std::vector<int32_t> string_result0_data;
+
+  // Send requests, first reset accumulator for the sequence.
+  SyncSend(
+      client, int_model_name, 0, int_sequence_id0, true /* start-of-sequence */,
+      false /* end-of-sequence */, int_result0_data, http_headers);
+  SyncSend(
+      client, int_model_name, 100, int_sequence_id1,
+      true /* start-of-sequence */, false /* end-of-sequence */,
+      int_result1_data, http_headers);
+  SyncSend(
+      client, string_model_name, 20, string_sequence_id0,
+      true /* start-of-sequence */, false /* end-of-sequence */,
+      string_result0_data, http_headers);
+
+  // Now send a sequence of values...
+  for (int32_t v : values) {
+    SyncSend(
+        client, int_model_name, v, int_sequence_id0,
+        false /* start-of-sequence */, (v == 1) /* end-of-sequence */,
+        int_result0_data, http_headers);
+    SyncSend(
+        client, int_model_name, -v, int_sequence_id1,
+        false /* start-of-sequence */, (v == 1) /* end-of-sequence */,
+        int_result1_data, http_headers);
+    SyncSend(
+        client, string_model_name, -v, string_sequence_id0,
+        false /* start-of-sequence */, (v == 1) /* end-of-sequence */,
+        string_result0_data, http_headers);
+  }
+
+  for (size_t i = 0; i < int_result0_data.size(); i++) {
+    int32_t int_seq0_expected = (i == 0) ? 1 : values[i - 1];
+    int32_t int_seq1_expected = (i == 0) ? 101 : values[i - 1] * -1;
+    int32_t string_seq0_expected;
+
+    // For string sequence ID case we are testing two different backends
+    if ((i == 0) && dyna_sequence) {
+      string_seq0_expected = 20;
+    } else if ((i == 0) && !dyna_sequence) {
+      string_seq0_expected = 21;
+    } else if ((i != 0) && dyna_sequence) {
+      string_seq0_expected = values[i - 1] * -1 + string_result0_data[i - 1];
+    } else {
+      string_seq0_expected = values[i - 1] * -1;
+    }
+
+    // The dyna_sequence custom backend adds the sequence ID to
+    // the last request in a sequence.
+    if (dyna_sequence && (i != 0) && (values[i - 1] == 1)) {
+      int_seq0_expected += int_sequence_id0;
+      int_seq1_expected += int_sequence_id1;
+      string_seq0_expected += std::stoi(string_sequence_id0);
+    }
+
+    std::cout << "[" << i << "] " << int_result0_data[i] << " : "
+              << int_result1_data[i] << " : " << string_result0_data[i]
+              << std::endl;
+
+    if ((int_seq0_expected != int_result0_data[i]) ||
+        (int_seq1_expected != int_result1_data[i]) ||
+        (string_seq0_expected != string_result0_data[i])) {
+      std::cout << "[ expected ] " << int_seq0_expected << " : "
+                << int_seq1_expected << " : " << string_seq0_expected
+                << std::endl;
+      return 1;
+    }
+  }
+
+  return 0;
+}
--- a/src/c++/examples/simple_grpc_shm_client.cc
+++ b/src/c++/examples/simple_grpc_shm_client.cc
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <unistd.h>
+
+#include <iostream>
+#include <string>
+
+#include "grpc_client.h"
+#include "shm_utils.h"
+
+namespace tc = triton::client;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+ValidateShapeAndDatatype(
+    const std::string& name, std::shared_ptr<tc::InferResult> result)
+{
+  std::vector<int64_t> shape;
+  FAIL_IF_ERR(
+      result->Shape(name, &shape), "unable to get shape for '" + name + "'");
+  // Validate shape
+  if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
+    std::cerr << "error: received incorrect shapes for '" << name << "'"
+              << std::endl;
+    exit(1);
+  }
+  std::string datatype;
+  FAIL_IF_ERR(
+      result->Datatype(name, &datatype),
+      "unable to get datatype for '" + name + "'");
+  // Validate datatype
+  if (datatype.compare("INT32") != 0) {
+    std::cerr << "error: received incorrect datatype for '" << name
+              << "': " << datatype << std::endl;
+    exit(1);
+  }
+}
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service>" << std::endl;
+  std::cerr << "\t-H <HTTP header>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl;
+
+  exit(1);
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  std::string url("localhost:8001");
+  tc::Headers http_headers;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
+    switch (opt) {
+      case 'v':
+        verbose = true;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  // We use a simple model that takes 2 input tensors of 16 integers
+  // each and returns 2 output tensors of 16 integers each. One output
+  // tensor is the element-wise sum of the inputs and one output is
+  // the element-wise difference.
+  std::string model_name = "simple";
+  std::string model_version = "";
+
+  // Create a InferenceServerGrpcClient instance to communicate with the
+  // server using gRPC protocol.
+  std::unique_ptr<tc::InferenceServerGrpcClient> client;
+  FAIL_IF_ERR(
+      tc::InferenceServerGrpcClient::Create(&client, url, verbose),
+      "unable to create grpc client");
+
+  // Unregistering all shared memory regions for a clean
+  // start.
+  FAIL_IF_ERR(
+      client->UnregisterSystemSharedMemory(),
+      "unable to unregister all system shared memory regions");
+  FAIL_IF_ERR(
+      client->UnregisterCudaSharedMemory(),
+      "unable to unregister all cuda shared memory regions");
+
+  std::vector<int64_t> shape{1, 16};
+  size_t input_byte_size = 64;
+  size_t output_byte_size = 64;
+
+  // Initialize the inputs with the data.
+  tc::InferInput* input0;
+  tc::InferInput* input1;
+
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
+      "unable to get INPUT0");
+  std::shared_ptr<tc::InferInput> input0_ptr;
+  input0_ptr.reset(input0);
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
+      "unable to get INPUT1");
+  std::shared_ptr<tc::InferInput> input1_ptr;
+  input1_ptr.reset(input1);
+
+  // Create Input0 and Input1 in Shared Memory. Initialize Input0 to unique
+  // integers and Input1 to all ones.
+  std::string shm_key = "/input_simple";
+  int shm_fd_ip, *input0_shm;
+  FAIL_IF_ERR(
+      tc::CreateSharedMemoryRegion(shm_key, input_byte_size * 2, &shm_fd_ip),
+      "");
+  FAIL_IF_ERR(
+      tc::MapSharedMemory(
+          shm_fd_ip, 0, input_byte_size * 2, (void**)&input0_shm),
+      "");
+  FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_ip), "");
+  int* input1_shm = (int*)(input0_shm + 16);
+  for (size_t i = 0; i < 16; ++i) {
+    *(input0_shm + i) = i;
+    *(input1_shm + i) = 1;
+  }
+
+  FAIL_IF_ERR(
+      client->RegisterSystemSharedMemory(
+          "input_data", "/input_simple", input_byte_size * 2),
+      "failed to register input shared memory region");
+
+  FAIL_IF_ERR(
+      input0_ptr->SetSharedMemory(
+          "input_data", input_byte_size, 0 /* offset */),
+      "unable to set shared memory for INPUT0");
+  FAIL_IF_ERR(
+      input1_ptr->SetSharedMemory(
+          "input_data", input_byte_size, input_byte_size /* offset */),
+      "unable to set shared memory for INPUT1");
+
+  // Generate the outputs to be requested.
+  tc::InferRequestedOutput* output0;
+  tc::InferRequestedOutput* output1;
+
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
+      "unable to get 'OUTPUT0'");
+  std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
+  output0_ptr.reset(output0);
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
+      "unable to get 'OUTPUT1'");
+  std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
+  output1_ptr.reset(output1);
+
+  // Create Output0 and Output1 in Shared Memory
+  shm_key = "/output_simple";
+  int shm_fd_op;
+  int* output0_shm;
+  FAIL_IF_ERR(
+      tc::CreateSharedMemoryRegion(shm_key, output_byte_size * 2, &shm_fd_op),
+      "");
+  FAIL_IF_ERR(
+      tc::MapSharedMemory(
+          shm_fd_op, 0, output_byte_size * 2, (void**)&output0_shm),
+      "");
+  FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_op), "");
+  int* output1_shm = (int*)(output0_shm + 16);
+
+
+  FAIL_IF_ERR(
+      client->RegisterSystemSharedMemory(
+          "output_data", "/output_simple", output_byte_size * 2),
+      "failed to register output shared memory region");
+
+  FAIL_IF_ERR(
+      output0_ptr->SetSharedMemory(
+          "output_data", output_byte_size, 0 /* offset */),
+      "unable to set shared memory for 'OUTPUT0'");
+  FAIL_IF_ERR(
+      output1_ptr->SetSharedMemory(
+          "output_data", output_byte_size, output_byte_size /* offset */),
+      "unable to set shared memory for 'OUTPUT1'");
+
+
+  // The inference settings. Will be using default for now.
+  tc::InferOptions options(model_name);
+  options.model_version_ = model_version;
+
+  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
+  std::vector<const tc::InferRequestedOutput*> outputs = {
+      output0_ptr.get(), output1_ptr.get()};
+
+  tc::InferResult* results;
+  FAIL_IF_ERR(
+      client->Infer(&results, options, inputs, outputs, http_headers),
+      "unable to run model");
+  std::shared_ptr<tc::InferResult> results_ptr;
+  results_ptr.reset(results);
+
+  // Validate the results...
+  ValidateShapeAndDatatype("OUTPUT0", results_ptr);
+  ValidateShapeAndDatatype("OUTPUT1", results_ptr);
+
+  for (size_t i = 0; i < 16; ++i) {
+    std::cout << input0_shm[i] << " + " << input1_shm[i] << " = "
+              << output0_shm[i] << std::endl;
+    std::cout << input0_shm[i] << " - " << input1_shm[i] << " = "
+              << output1_shm[i] << std::endl;
+
+    if ((input0_shm[i] + input1_shm[i]) != output0_shm[i]) {
+      std::cerr << "error: incorrect sum" << std::endl;
+      exit(1);
+    }
+    if ((input0_shm[i] - input1_shm[i]) != output1_shm[i]) {
+      std::cerr << "error: incorrect difference" << std::endl;
+      exit(1);
+    }
+  }
+
+  // Get shared memory regions active/registered within triton
+  inference::SystemSharedMemoryStatusResponse status;
+  FAIL_IF_ERR(
+      client->SystemSharedMemoryStatus(&status),
+      "failed to get shared memory status");
+  std::cout << "Shared Memory Status:\n" << status.DebugString() << "\n";
+
+  // Unregister shared memory
+  FAIL_IF_ERR(
+      client->UnregisterSystemSharedMemory("input_data"),
+      "unable to unregister shared memory input region");
+  FAIL_IF_ERR(
+      client->UnregisterSystemSharedMemory("output_data"),
+      "unable to unregister shared memory output region");
+
+  // Cleanup shared memory
+  FAIL_IF_ERR(tc::UnmapSharedMemory(input0_shm, input_byte_size * 2), "");
+  FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/input_simple"), "");
+  FAIL_IF_ERR(tc::UnmapSharedMemory(output0_shm, output_byte_size * 2), "");
+  FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/output_simple"), "");
+
+  std::cout << "PASS : System Shared Memory " << std::endl;
+
+  return 0;
+}
--- a/src/c++/examples/simple_grpc_string_infer_client.cc
+++ b/src/c++/examples/simple_grpc_string_infer_client.cc
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <unistd.h>
+
+#include <iostream>
+#include <string>
+
+#include "grpc_client.h"
+
+namespace tc = triton::client;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+ValidateShapeAndDatatype(
+    const std::string& name, std::shared_ptr<tc::InferResult> result)
+{
+  std::vector<int64_t> shape;
+  FAIL_IF_ERR(result->Shape(name, &shape), "unable to get shape for " + name);
+  // Validate shape
+  if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
+    std::cerr << "error: received incorrect shapes for " << name << std::endl;
+    exit(1);
+  }
+  std::string datatype;
+  FAIL_IF_ERR(
+      result->Datatype(name, &datatype), "unable to get datatype for " + name);
+  // Validate datatype
+  if (datatype.compare("BYTES") != 0) {
+    std::cerr << "error: received incorrect datatype for " << name << ": "
+              << datatype << std::endl;
+    exit(1);
+  }
+}
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service>" << std::endl;
+  std::cerr << "\t-H <HTTP header>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl;
+
+  exit(1);
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  std::string url("localhost:8001");
+  tc::Headers http_headers;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
+    switch (opt) {
+      case 'v':
+        verbose = true;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  // We use a simple model that takes 2 input tensors of 16 strings
+  // each and returns 2 output tensors of 16 strings each. The input
+  // strings must represent integers. One output tensor is the
+  // element-wise sum of the inputs and one output is the element-wise
+  // difference.
+  std::string model_name = "simple_string";
+  std::string model_version = "";
+
+  // Create a InferenceServerGrpcClient instance to communicate with the
+  // server using gRPC protocol.
+  std::unique_ptr<tc::InferenceServerGrpcClient> client;
+  FAIL_IF_ERR(
+      tc::InferenceServerGrpcClient::Create(&client, url, verbose),
+      "unable to create grpc client");
+
+  // Create the data for the two input tensors. Initialize the first
+  // to unique integers and the second to all ones. The input tensors
+  // are the string representation of these values.
+  std::vector<std::string> input0_data(16);
+  std::vector<std::string> input1_data(16);
+  std::vector<int32_t> expected_sum(16);
+  std::vector<int32_t> expected_diff(16);
+  for (size_t i = 0; i < 16; ++i) {
+    input0_data[i] = std::to_string(i);
+    input1_data[i] = std::to_string(1);
+    expected_sum[i] = i + 1;
+    expected_diff[i] = i - 1;
+  }
+
+  std::vector<int64_t> shape{1, 16};
+
+  // Initialize the inputs with the data.
+  tc::InferInput* input0;
+  tc::InferInput* input1;
+
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input0, "INPUT0", shape, "BYTES"),
+      "unable to get INPUT0");
+  std::shared_ptr<tc::InferInput> input0_ptr;
+  input0_ptr.reset(input0);
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input1, "INPUT1", shape, "BYTES"),
+      "unable to get INPUT1");
+  std::shared_ptr<tc::InferInput> input1_ptr;
+  input1_ptr.reset(input1);
+
+  FAIL_IF_ERR(
+      input0_ptr->AppendFromString(input0_data),
+      "unable to set data for INPUT0");
+  FAIL_IF_ERR(
+      input1_ptr->AppendFromString(input1_data),
+      "unable to set data for INPUT1");
+
+  // Generate the outputs to be requested.
+  tc::InferRequestedOutput* output0;
+  tc::InferRequestedOutput* output1;
+
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
+      "unable to get OUTPUT0");
+  std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
+  output0_ptr.reset(output0);
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
+      "unable to get OUTPUT1");
+  std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
+  output1_ptr.reset(output1);
+
+
+  // The inference settings. Will be using default for now.
+  tc::InferOptions options(model_name);
+  options.model_version_ = model_version;
+
+  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
+  std::vector<const tc::InferRequestedOutput*> outputs = {
+      output0_ptr.get(), output1_ptr.get()};
+
+  tc::InferResult* results;
+  FAIL_IF_ERR(
+      client->Infer(&results, options, inputs, outputs, http_headers),
+      "unable to run model");
+  std::shared_ptr<tc::InferResult> results_ptr;
+  results_ptr.reset(results);
+
+  // Validate the results...
+  ValidateShapeAndDatatype("OUTPUT0", results_ptr);
+  ValidateShapeAndDatatype("OUTPUT1", results_ptr);
+
+  // Get the result data
+  std::vector<std::string> result0_data;
+  std::vector<std::string> result1_data;
+  FAIL_IF_ERR(
+      results_ptr->StringData("OUTPUT0", &result0_data),
+      "unable to get data for OUTPUT0");
+  if (result0_data.size() != 16) {
+    std::cerr << "error: received incorrect number of strings for OUTPUT0: "
+              << result0_data.size() << std::endl;
+    exit(1);
+  }
+
+  FAIL_IF_ERR(
+      results_ptr->StringData("OUTPUT1", &result1_data),
+      "unable to get data for OUTPUT1");
+  if (result1_data.size() != 16) {
+    std::cerr << "error: received incorrect number of strings for OUTPUT1: "
+              << result1_data.size() << std::endl;
+    exit(1);
+  }
+
+
+  for (size_t i = 0; i < 16; ++i) {
+    std::cout << input0_data[i] << " + " << input0_data[i] << " = "
+              << result0_data[i] << std::endl;
+    std::cout << input0_data[i] << " - " << input1_data[i] << " = "
+              << result1_data[i] << std::endl;
+
+    if (expected_sum[i] != std::stoi(result0_data[i])) {
+      std::cerr << "error: incorrect sum" << std::endl;
+      exit(1);
+    }
+    if (expected_diff[i] != std::stoi(result1_data[i])) {
+      std::cerr << "error: incorrect difference" << std::endl;
+      exit(1);
+    }
+  }
+
+  // Get full response
+  std::cout << results_ptr->DebugString() << std::endl;
+
+  std::cout << "PASS : String Infer" << std::endl;
+
+  return 0;
+}
--- a/src/c++/examples/simple_http_async_infer_client.cc
+++ b/src/c++/examples/simple_http_async_infer_client.cc
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <unistd.h>
+
+#include <condition_variable>
+#include <iostream>
+#include <mutex>
+#include <string>
+
+#include "http_client.h"
+
+namespace tc = triton::client;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+ValidateShapeAndDatatype(
+    const std::string& name, std::shared_ptr<tc::InferResult> result)
+{
+  std::vector<int64_t> shape;
+  FAIL_IF_ERR(
+      result->Shape(name, &shape), "unable to get shape for '" + name + "'");
+  // Validate shape
+  if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
+    std::cerr << "error: received incorrect shapes for '" << name << "'"
+              << std::endl;
+    exit(1);
+  }
+  std::string datatype;
+  FAIL_IF_ERR(
+      result->Datatype(name, &datatype),
+      "unable to get datatype for '" + name + "'");
+  // Validate datatype
+  if (datatype.compare("INT32") != 0) {
+    std::cerr << "error: received incorrect datatype for '" << name
+              << "': " << datatype << std::endl;
+    exit(1);
+  }
+}
+
+void
+ValidateResult(
+    const std::shared_ptr<tc::InferResult> result,
+    std::vector<int32_t>& input0_data, std::vector<int32_t>& input1_data)
+{
+  // Validate the results...
+  ValidateShapeAndDatatype("OUTPUT0", result);
+  ValidateShapeAndDatatype("OUTPUT1", result);
+
+  // Get pointers to the result returned...
+  int32_t* output0_data;
+  size_t output0_byte_size;
+  FAIL_IF_ERR(
+      result->RawData(
+          "OUTPUT0", (const uint8_t**)&output0_data, &output0_byte_size),
+      "unable to get result data for 'OUTPUT0'");
+  if (output0_byte_size != 64) {
+    std::cerr << "error: received incorrect byte size for 'OUTPUT0': "
+              << output0_byte_size << std::endl;
+    exit(1);
+  }
+
+  int32_t* output1_data;
+  size_t output1_byte_size;
+  FAIL_IF_ERR(
+      result->RawData(
+          "OUTPUT1", (const uint8_t**)&output1_data, &output1_byte_size),
+      "unable to get result data for 'OUTPUT1'");
+  if (output0_byte_size != 64) {
+    std::cerr << "error: received incorrect byte size for 'OUTPUT1': "
+              << output0_byte_size << std::endl;
+    exit(1);
+  }
+
+  for (size_t i = 0; i < 16; ++i) {
+    std::cout << input0_data[i] << " + " << input1_data[i] << " = "
+              << *(output0_data + i) << std::endl;
+    std::cout << input0_data[i] << " - " << input1_data[i] << " = "
+              << *(output1_data + i) << std::endl;
+
+    if ((input0_data[i] + input1_data[i]) != *(output0_data + i)) {
+      std::cerr << "error: incorrect sum" << std::endl;
+      exit(1);
+    }
+    if ((input0_data[i] - input1_data[i]) != *(output1_data + i)) {
+      std::cerr << "error: incorrect difference" << std::endl;
+      exit(1);
+    }
+  }
+
+  // Get full response
+  std::cout << result->DebugString() << std::endl;
+}
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service>" << std::endl;
+  std::cerr << "\t-t <client timeout in microseconds>" << std::endl;
+  std::cerr << "\t-H <HTTP header>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl;
+
+  exit(1);
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  std::string url("localhost:8000");
+  tc::Headers http_headers;
+  uint32_t client_timeout = 0;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vu:t:H:")) != -1) {
+    switch (opt) {
+      case 'v':
+        verbose = true;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 't':
+        client_timeout = std::stoi(optarg);
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  // We use a simple model that takes 2 input tensors of 16 integers
+  // each and returns 2 output tensors of 16 integers each. One output
+  // tensor is the element-wise sum of the inputs and one output is
+  // the element-wise difference.
+  std::string model_name = "simple";
+  std::string model_version = "";
+
+  // Create a InferenceServerHttpClient instance to communicate with the
+  // server using HTTP protocol.
+  std::unique_ptr<tc::InferenceServerHttpClient> client;
+  FAIL_IF_ERR(
+      tc::InferenceServerHttpClient::Create(&client, url, verbose),
+      "unable to create http client");
+
+  // Create the data for the two input tensors. Initialize the first
+  // to unique integers and the second to all ones.
+  std::vector<int32_t> input0_data(16);
+  std::vector<int32_t> input1_data(16);
+  for (size_t i = 0; i < 16; ++i) {
+    input0_data[i] = i;
+    input1_data[i] = 1;
+  }
+
+  std::vector<int64_t> shape{1, 16};
+
+  // Initialize the inputs with the data.
+  tc::InferInput* input0;
+  tc::InferInput* input1;
+
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
+      "unable to get INPUT0");
+  std::shared_ptr<tc::InferInput> input0_ptr;
+  input0_ptr.reset(input0);
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
+      "unable to get INPUT1");
+  std::shared_ptr<tc::InferInput> input1_ptr;
+  input1_ptr.reset(input1);
+
+  FAIL_IF_ERR(
+      input0_ptr->AppendRaw(
+          reinterpret_cast<uint8_t*>(&input0_data[0]),
+          input0_data.size() * sizeof(int32_t)),
+      "unable to set data for INPUT0");
+  FAIL_IF_ERR(
+      input1_ptr->AppendRaw(
+          reinterpret_cast<uint8_t*>(&input1_data[0]),
+          input1_data.size() * sizeof(int32_t)),
+      "unable to set data for INPUT1");
+
+  // Generate the outputs to be requested.
+  tc::InferRequestedOutput* output0;
+  tc::InferRequestedOutput* output1;
+
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
+      "unable to get 'OUTPUT0'");
+  std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
+  output0_ptr.reset(output0);
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
+      "unable to get 'OUTPUT1'");
+  std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
+  output1_ptr.reset(output1);
+
+
+  // The inference settings. Will be using default for now.
+  tc::InferOptions options(model_name);
+  options.model_version_ = model_version;
+  options.client_timeout_ = client_timeout;
+
+  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
+  std::vector<const tc::InferRequestedOutput*> outputs = {
+      output0_ptr.get(), output1_ptr.get()};
+
+  // Send inference request to the inference server.
+  std::mutex mtx;
+  std::condition_variable cv;
+  size_t repeat_cnt = 2;
+  size_t done_cnt = 0;
+  for (size_t i = 0; i < repeat_cnt; i++) {
+    FAIL_IF_ERR(
+        client->AsyncInfer(
+            [&, i](tc::InferResult* result) {
+              {
+                std::shared_ptr<tc::InferResult> result_ptr;
+                result_ptr.reset(result);
+                std::lock_guard<std::mutex> lk(mtx);
+                std::cout << "Callback no." << i << " is called" << std::endl;
+                done_cnt++;
+                if (result_ptr->RequestStatus().IsOk()) {
+                  ValidateResult(result_ptr, input0_data, input1_data);
+                } else {
+                  std::cerr << "error: Inference failed: "
+                            << result_ptr->RequestStatus() << std::endl;
+                  exit(1);
+                }
+              }
+              cv.notify_all();
+            },
+            options, inputs, outputs, http_headers),
+        "unable to run model");
+  }
+
+  // Wait until all callbacks are invoked
+  {
+    std::unique_lock<std::mutex> lk(mtx);
+    cv.wait(lk, [&]() {
+      if (done_cnt >= repeat_cnt) {
+        return true;
+      } else {
+        return false;
+      }
+    });
+  }
+  if (done_cnt == repeat_cnt) {
+    std::cout << "All done" << std::endl;
+  } else {
+    std::cerr << "Done cnt: " << done_cnt
+              << " does not match repeat cnt: " << repeat_cnt << std::endl;
+    exit(1);
+  }
+
+  // Send another AsyncInfer whose callback defers the completed request
+  // to another thread (main thread) to handle
+  bool callback_invoked = false;
+  std::shared_ptr<tc::InferResult> result_placeholder;
+  FAIL_IF_ERR(
+      client->AsyncInfer(
+          [&](tc::InferResult* result) {
+            {
+              std::shared_ptr<tc::InferResult> result_ptr;
+              result_ptr.reset(result);
+              // Defer the response retrieval to main thread
+              std::lock_guard<std::mutex> lk(mtx);
+              callback_invoked = true;
+              result_placeholder = std::move(result_ptr);
+            }
+            cv.notify_all();
+          },
+          options, inputs, outputs, http_headers),
+      "unable to run model");
+
+  // Ensure callback is completed
+  {
+    std::unique_lock<std::mutex> lk(mtx);
+    cv.wait(lk, [&]() { return callback_invoked; });
+  }
+
+  // Get deferred response
+  std::cout << "Getting results from deferred response" << std::endl;
+  if (result_placeholder->RequestStatus().IsOk()) {
+    ValidateResult(result_placeholder, input0_data, input1_data);
+  } else {
+    std::cerr << "error: Inference failed: "
+              << result_placeholder->RequestStatus() << std::endl;
+    exit(1);
+  }
+
+  tc::InferStat infer_stat;
+  client->ClientInferStat(&infer_stat);
+  std::cout << "completed_request_count " << infer_stat.completed_request_count
+            << std::endl;
+  std::cout << "cumulative_total_request_time_ns "
+            << infer_stat.cumulative_total_request_time_ns << std::endl;
+  std::cout << "cumulative_send_time_ns " << infer_stat.cumulative_send_time_ns
+            << std::endl;
+  std::cout << "cumulative_receive_time_ns "
+            << infer_stat.cumulative_receive_time_ns << std::endl;
+
+  std::cout << "PASS : Async Infer" << std::endl;
+
+  return 0;
+}
--- a/src/c++/examples/simple_http_cudashm_client.cc
+++ b/src/c++/examples/simple_http_cudashm_client.cc
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <cuda_runtime_api.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <string>
+
+#include "http_client.h"
+#include "shm_utils.h"
+
+namespace tc = triton::client;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+ValidateShapeAndDatatype(
+    const std::string& name, std::shared_ptr<tc::InferResult> result)
+{
+  std::vector<int64_t> shape;
+  FAIL_IF_ERR(
+      result->Shape(name, &shape), "unable to get shape for '" + name + "'");
+  // Validate shape
+  if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
+    std::cerr << "error: received incorrect shapes for '" << name << "'"
+              << std::endl;
+    exit(1);
+  }
+  std::string datatype;
+  FAIL_IF_ERR(
+      result->Datatype(name, &datatype),
+      "unable to get datatype for '" + name + "'");
+  // Validate datatype
+  if (datatype.compare("INT32") != 0) {
+    std::cerr << "error: received incorrect datatype for '" << name
+              << "': " << datatype << std::endl;
+    exit(1);
+  }
+}
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service>" << std::endl;
+  std::cerr << "\t-H <HTTP header>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl;
+
+  exit(1);
+}
+
+}  // namespace
+
+#define FAIL_IF_CUDA_ERR(FUNC)                                     \
+  {                                                                \
+    const cudaError_t result = FUNC;                               \
+    if (result != cudaSuccess) {                                   \
+      std::cerr << "CUDA exception (line " << __LINE__             \
+                << "): " << cudaGetErrorName(result) << " ("       \
+                << cudaGetErrorString(result) << ")" << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+void
+CreateCUDAIPCHandle(
+    cudaIpcMemHandle_t* cuda_handle, void* input_d_ptr, int device_id = 0)
+{
+  // Set the GPU device to the desired GPU
+  FAIL_IF_CUDA_ERR(cudaSetDevice(device_id));
+
+  //  Create IPC handle for data on the gpu
+  FAIL_IF_CUDA_ERR(cudaIpcGetMemHandle(cuda_handle, input_d_ptr));
+}
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  std::string url("localhost:8000");
+  tc::Headers http_headers;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
+    switch (opt) {
+      case 'v':
+        verbose = true;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  // We use a simple model that takes 2 input tensors of 16 integers
+  // each and returns 2 output tensors of 16 integers each. One output
+  // tensor is the element-wise sum of the inputs and one output is
+  // the element-wise difference.
+  std::string model_name = "simple";
+  std::string model_version = "";
+
+  // Create a InferenceServerHttpClient instance to communicate with the
+  // server using http protocol.
+  std::unique_ptr<tc::InferenceServerHttpClient> client;
+  FAIL_IF_ERR(
+      tc::InferenceServerHttpClient::Create(&client, url, verbose),
+      "unable to create http client");
+
+  // Unregistering all shared memory regions for a clean
+  // start.
+  FAIL_IF_ERR(
+      client->UnregisterSystemSharedMemory(),
+      "unable to unregister all system shared memory regions");
+  FAIL_IF_ERR(
+      client->UnregisterCudaSharedMemory(),
+      "unable to unregister all cuda shared memory regions");
+
+  std::vector<int64_t> shape{1, 16};
+  size_t input_byte_size = 64;
+  size_t output_byte_size = 64;
+
+  // Initialize the inputs with the data.
+  tc::InferInput* input0;
+  tc::InferInput* input1;
+
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
+      "unable to get INPUT0");
+  std::shared_ptr<tc::InferInput> input0_ptr;
+  input0_ptr.reset(input0);
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
+      "unable to get INPUT1");
+  std::shared_ptr<tc::InferInput> input1_ptr;
+  input1_ptr.reset(input1);
+
+  // Create Input0 and Input1 in CUDA Shared Memory. Initialize Input0 to
+  // unique integers and Input1 to all ones.
+  int input_data[32];
+  for (size_t i = 0; i < 16; ++i) {
+    input_data[i] = i;
+    input_data[16 + i] = 1;
+  }
+
+  // copy INPUT0 and INPUT1 data in GPU shared memory
+  int* input_d_ptr;
+  cudaMalloc((void**)&input_d_ptr, input_byte_size * 2);
+  cudaMemcpy(
+      (void*)input_d_ptr, (void*)input_data, input_byte_size * 2,
+      cudaMemcpyHostToDevice);
+
+  cudaIpcMemHandle_t input_cuda_handle;
+  CreateCUDAIPCHandle(&input_cuda_handle, (void*)input_d_ptr);
+
+  FAIL_IF_ERR(
+      client->RegisterCudaSharedMemory(
+          "input_data", input_cuda_handle, 0 /* device_id */,
+          input_byte_size * 2),
+      "failed to register input shared memory region");
+
+  FAIL_IF_ERR(
+      input0_ptr->SetSharedMemory(
+          "input_data", input_byte_size, 0 /* offset */),
+      "unable to set shared memory for INPUT0");
+  FAIL_IF_ERR(
+      input1_ptr->SetSharedMemory(
+          "input_data", input_byte_size, input_byte_size /* offset */),
+      "unable to set shared memory for INPUT1");
+
+  // Generate the outputs to be requested.
+  tc::InferRequestedOutput* output0;
+  tc::InferRequestedOutput* output1;
+
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
+      "unable to get 'OUTPUT0'");
+  std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
+  output0_ptr.reset(output0);
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
+      "unable to get 'OUTPUT1'");
+  std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
+  output1_ptr.reset(output1);
+
+  // Create Output0 and Output1 in CUDA Shared Memory
+  int *output0_d_ptr, *output1_d_ptr;
+  cudaMalloc((void**)&output0_d_ptr, output_byte_size * 2);
+  output1_d_ptr = (int*)output0_d_ptr + 16;
+
+  cudaIpcMemHandle_t output_cuda_handle;
+  CreateCUDAIPCHandle(&output_cuda_handle, (void*)output0_d_ptr);
+
+  FAIL_IF_ERR(
+      client->RegisterCudaSharedMemory(
+          "output_data", output_cuda_handle, 0 /* device_id */,
+          output_byte_size * 2),
+      "failed to register output shared memory region");
+
+  FAIL_IF_ERR(
+      output0_ptr->SetSharedMemory(
+          "output_data", output_byte_size, 0 /* offset */),
+      "unable to set shared memory for 'OUTPUT0'");
+  FAIL_IF_ERR(
+      output1_ptr->SetSharedMemory(
+          "output_data", output_byte_size, output_byte_size /* offset */),
+      "unable to set shared memory for 'OUTPUT1'");
+
+
+  // The inference settings. Will be using default for now.
+  tc::InferOptions options(model_name);
+  options.model_version_ = model_version;
+
+  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
+  std::vector<const tc::InferRequestedOutput*> outputs = {
+      output0_ptr.get(), output1_ptr.get()};
+
+  tc::InferResult* results;
+  FAIL_IF_ERR(
+      client->Infer(&results, options, inputs, outputs, http_headers),
+      "unable to run model");
+  std::shared_ptr<tc::InferResult> results_ptr;
+  results_ptr.reset(results);
+
+  // Validate the results...
+  ValidateShapeAndDatatype("OUTPUT0", results_ptr);
+  ValidateShapeAndDatatype("OUTPUT1", results_ptr);
+
+  // Copy input and output data back to the CPU
+  int output0_data[16], output1_data[16];
+  cudaMemcpy(
+      output0_data, output0_d_ptr, output_byte_size, cudaMemcpyDeviceToHost);
+  cudaMemcpy(
+      output1_data, output1_d_ptr, output_byte_size, cudaMemcpyDeviceToHost);
+
+  for (size_t i = 0; i < 16; ++i) {
+    std::cout << input_data[i] << " + " << input_data[16 + i] << " = "
+              << output0_data[i] << std::endl;
+    std::cout << input_data[i] << " + " << input_data[16 + i] << " = "
+              << output1_data[i] << std::endl;
+
+    if ((input_data[i] + input_data[16 + i]) != output0_data[i]) {
+      std::cerr << "error: incorrect sum" << std::endl;
+      exit(1);
+    }
+    if ((input_data[i] - input_data[16 + i]) != output1_data[i]) {
+      std::cerr << "error: incorrect difference" << std::endl;
+      exit(1);
+    }
+  }
+
+  // Get shared memory regions active/registered within triton
+  std::string shm_status;
+  FAIL_IF_ERR(
+      client->CudaSharedMemoryStatus(&shm_status),
+      "failed to get shared memory status");
+  std::cout << "Shared Memory Status:\n" << shm_status << "\n";
+
+  // Unregister shared memory
+  FAIL_IF_ERR(
+      client->UnregisterCudaSharedMemory("input_data"),
+      "unable to unregister shared memory input region");
+  FAIL_IF_ERR(
+      client->UnregisterCudaSharedMemory("output_data"),
+      "unable to unregister shared memory output region");
+
+  // Free GPU memory
+  FAIL_IF_CUDA_ERR(cudaFree(input_d_ptr));
+  FAIL_IF_CUDA_ERR(cudaFree(output0_d_ptr));
+
+  std::cout << "PASS : Cuda Shared Memory " << std::endl;
+
+  return 0;
+}
--- a/src/c++/examples/simple_http_health_metadata.cc
+++ b/src/c++/examples/simple_http_health_metadata.cc
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <unistd.h>
+
+#include <iostream>
+#include <string>
+
+#include "http_client.h"
+#include "json_utils.h"
+
+namespace tc = triton::client;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service>" << std::endl;
+  std::cerr << "\t-H <HTTP header>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl;
+
+  exit(1);
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  std::string url("localhost:8000");
+  tc::Headers http_headers;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
+    switch (opt) {
+      case 'v':
+        verbose = true;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  // We use a simple model that takes 2 input tensors of 16 integers
+  // each and returns 2 output tensors of 16 integers each. One output
+  // tensor is the element-wise sum of the inputs and one output is
+  // the element-wise difference.
+  std::string model_name = "simple";
+  std::string model_version = "";
+
+  // Create a InferenceServerHttpClient instance to communicate with the
+  // server using http protocol.
+  std::unique_ptr<tc::InferenceServerHttpClient> client;
+  FAIL_IF_ERR(
+      tc::InferenceServerHttpClient::Create(&client, url, verbose),
+      "unable to create http client");
+
+  bool live;
+  FAIL_IF_ERR(
+      client->IsServerLive(&live, http_headers),
+      "unable to get server liveness");
+  if (!live) {
+    std::cerr << "error: server is not live" << std::endl;
+    exit(1);
+  }
+
+  bool ready;
+  FAIL_IF_ERR(
+      client->IsServerReady(&ready, http_headers), "server is not live");
+
+  bool model_ready;
+  FAIL_IF_ERR(
+      client->IsModelReady(
+          &model_ready, model_name, model_version, http_headers),
+      "unable to get model readiness");
+  if (!model_ready) {
+    std::cerr << "error: model " << model_name << " is not live" << std::endl;
+    exit(1);
+  }
+
+  {
+    std::string server_metadata;
+    FAIL_IF_ERR(
+        client->ServerMetadata(&server_metadata, http_headers),
+        "unable to get server metadata");
+
+    rapidjson::Document server_metadata_json;
+    FAIL_IF_ERR(
+        tc::ParseJson(&server_metadata_json, server_metadata),
+        "failed to parse server metadata");
+    if ((std::string(server_metadata_json["name"].GetString()))
+            .compare("triton") != 0) {
+      std::cerr << "error: unexpected server metadata: " << server_metadata
+                << std::endl;
+      exit(1);
+    }
+  }
+
+  {
+    std::string model_metadata;
+    FAIL_IF_ERR(
+        client->ModelMetadata(
+            &model_metadata, model_name, model_version, http_headers),
+        "unable to get model metadata");
+
+    rapidjson::Document model_metadata_json;
+    FAIL_IF_ERR(
+        tc::ParseJson(&model_metadata_json, model_metadata),
+        "failed to parse model metadata");
+    if ((std::string(model_metadata_json["name"].GetString()))
+            .compare(model_name) != 0) {
+      std::cerr << "error: unexpected model metadata: " << model_metadata
+                << std::endl;
+      exit(1);
+    }
+  }
+
+  {
+    std::string model_config;
+    FAIL_IF_ERR(
+        client->ModelConfig(
+            &model_config, model_name, model_version, http_headers),
+        "unable to get model config");
+
+    rapidjson::Document model_config_json;
+    FAIL_IF_ERR(
+        tc::ParseJson(&model_config_json, model_config),
+        "failed to parse model config");
+    if ((std::string(model_config_json["name"].GetString()))
+            .compare(model_name) != 0) {
+      std::cerr << "error: unexpected model config: " << model_config
+                << std::endl;
+      exit(1);
+    }
+  }
+
+  {
+    std::string model_metadata;
+    tc::Error err = client->ModelMetadata(
+        &model_metadata, "wrong_model_name", model_version, http_headers);
+    if (err.IsOk()) {
+      std::cerr << "error: expected an error but got: " << err << std::endl;
+      exit(1);
+    }
+  }
+
+  std::cout << "SUCCESS" << std::endl;
+
+  return 0;
+}
--- a/src/c++/examples/simple_http_infer_client.cc
+++ b/src/c++/examples/simple_http_infer_client.cc
+// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <getopt.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <string>
+
+#include "http_client.h"
+
+namespace tc = triton::client;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+ValidateShapeAndDatatype(
+    const std::string& name, std::shared_ptr<tc::InferResult> result)
+{
+  std::vector<int64_t> shape;
+  FAIL_IF_ERR(
+      result->Shape(name, &shape), "unable to get shape for '" + name + "'");
+  // Validate shape
+  if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
+    std::cerr << "error: received incorrect shapes for '" << name << "'"
+              << std::endl;
+    exit(1);
+  }
+  std::string datatype;
+  FAIL_IF_ERR(
+      result->Datatype(name, &datatype),
+      "unable to get datatype for '" + name + "'");
+  // Validate datatype
+  if (datatype.compare("INT32") != 0) {
+    std::cerr << "error: received incorrect datatype for '" << name
+              << "': " << datatype << std::endl;
+    exit(1);
+  }
+}
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service>" << std::endl;
+  std::cerr << "\t-t <client timeout in microseconds>" << std::endl;
+  std::cerr << "\t-H <HTTP header>" << std::endl;
+  std::cerr << "\t-i <none|gzip|deflate>" << std::endl;
+  std::cerr << "\t-o <none|gzip|deflate>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr << "\t--verify-peer" << std::endl;
+  std::cerr << "\t--verify-host" << std::endl;
+  std::cerr << "\t--ca-certs" << std::endl;
+  std::cerr << "\t--cert-file" << std::endl;
+  std::cerr << "\t--key-file" << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl
+      << "For -i, it sets the compression algorithm used for sending request "
+         "body."
+      << "For -o, it sets the compression algorithm used for receiving "
+         "response body."
+      << std::endl;
+
+  exit(1);
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  std::string url("localhost:8000");
+  tc::Headers http_headers;
+  uint32_t client_timeout = 0;
+  auto request_compression_algorithm =
+      tc::InferenceServerHttpClient::CompressionType::NONE;
+  auto response_compression_algorithm =
+      tc::InferenceServerHttpClient::CompressionType::NONE;
+  long verify_peer = 1;
+  long verify_host = 2;
+  std::string cacerts;
+  std::string certfile;
+  std::string keyfile;
+
+  // {name, has_arg, *flag, val}
+  static struct option long_options[] = {
+      {"verify-peer", 1, 0, 0}, {"verify-host", 1, 0, 1}, {"ca-certs", 1, 0, 2},
+      {"cert-file", 1, 0, 3},   {"key-file", 1, 0, 4},    {0, 0, 0, 0}};
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt_long(argc, argv, "vu:t:H:i:o:", long_options, NULL)) !=
+         -1) {
+    switch (opt) {
+      case 0:
+        verify_peer = std::atoi(optarg);
+        break;
+      case 1:
+        verify_host = std::atoi(optarg);
+        break;
+      case 2:
+        cacerts = optarg;
+        break;
+      case 3:
+        certfile = optarg;
+        break;
+      case 4:
+        keyfile = optarg;
+        break;
+      case 'v':
+        verbose = true;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 't':
+        client_timeout = std::stoi(optarg);
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case 'i': {
+        std::string arg = optarg;
+        if (arg == "gzip") {
+          request_compression_algorithm =
+              tc::InferenceServerHttpClient::CompressionType::GZIP;
+        } else if (arg == "deflate") {
+          request_compression_algorithm =
+              tc::InferenceServerHttpClient::CompressionType::DEFLATE;
+        }
+        break;
+      }
+      case 'o': {
+        std::string arg = optarg;
+        if (arg == "gzip") {
+          response_compression_algorithm =
+              tc::InferenceServerHttpClient::CompressionType::GZIP;
+        } else if (arg == "deflate") {
+          response_compression_algorithm =
+              tc::InferenceServerHttpClient::CompressionType::DEFLATE;
+        }
+        break;
+      }
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  // We use a simple model that takes 2 input tensors of 16 integers
+  // each and returns 2 output tensors of 16 integers each. One output
+  // tensor is the element-wise sum of the inputs and one output is
+  // the element-wise difference.
+  std::string model_name = "simple";
+  std::string model_version = "";
+
+  tc::HttpSslOptions ssl_options;
+  ssl_options.verify_peer = verify_peer;
+  ssl_options.verify_host = verify_host;
+  ssl_options.ca_info = cacerts;
+  ssl_options.cert = certfile;
+  ssl_options.key = keyfile;
+  // Create a InferenceServerHttpClient instance to communicate with the
+  // server using HTTP protocol.
+  std::unique_ptr<tc::InferenceServerHttpClient> client;
+  FAIL_IF_ERR(
+      tc::InferenceServerHttpClient::Create(&client, url, verbose, ssl_options),
+      "unable to create http client");
+
+  // Create the data for the two input tensors. Initialize the first
+  // to unique integers and the second to all ones.
+  std::vector<int32_t> input0_data(16);
+  std::vector<int32_t> input1_data(16);
+  for (size_t i = 0; i < 16; ++i) {
+    input0_data[i] = i;
+    input1_data[i] = 1;
+  }
+
+  std::vector<int64_t> shape{1, 16};
+
+  // Initialize the inputs with the data.
+  tc::InferInput* input0;
+  tc::InferInput* input1;
+
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
+      "unable to get INPUT0");
+  std::shared_ptr<tc::InferInput> input0_ptr;
+  input0_ptr.reset(input0);
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
+      "unable to get INPUT1");
+  std::shared_ptr<tc::InferInput> input1_ptr;
+  input1_ptr.reset(input1);
+
+  FAIL_IF_ERR(
+      input0_ptr->AppendRaw(
+          reinterpret_cast<uint8_t*>(&input0_data[0]),
+          input0_data.size() * sizeof(int32_t)),
+      "unable to set data for INPUT0");
+  FAIL_IF_ERR(
+      input1_ptr->AppendRaw(
+          reinterpret_cast<uint8_t*>(&input1_data[0]),
+          input1_data.size() * sizeof(int32_t)),
+      "unable to set data for INPUT1");
+
+  // The inference settings. Will be using default for now.
+  tc::InferOptions options(model_name);
+  options.model_version_ = model_version;
+  options.client_timeout_ = client_timeout;
+
+  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
+  // Empty output vector will request data for all the output tensors from
+  // the server.
+  std::vector<const tc::InferRequestedOutput*> outputs = {};
+
+  tc::InferResult* results;
+  FAIL_IF_ERR(
+      client->Infer(
+          &results, options, inputs, outputs, http_headers, tc::Parameters(),
+          request_compression_algorithm, response_compression_algorithm),
+      "unable to run model");
+  std::shared_ptr<tc::InferResult> results_ptr;
+  results_ptr.reset(results);
+
+  // Validate the results...
+  ValidateShapeAndDatatype("OUTPUT0", results_ptr);
+  ValidateShapeAndDatatype("OUTPUT1", results_ptr);
+
+  // Get pointers to the result returned...
+  int32_t* output0_data;
+  size_t output0_byte_size;
+  FAIL_IF_ERR(
+      results_ptr->RawData(
+          "OUTPUT0", (const uint8_t**)&output0_data, &output0_byte_size),
+      "unable to get result data for 'OUTPUT0'");
+  if (output0_byte_size != 64) {
+    std::cerr << "error: received incorrect byte size for 'OUTPUT0': "
+              << output0_byte_size << std::endl;
+    exit(1);
+  }
+
+  int32_t* output1_data;
+  size_t output1_byte_size;
+  FAIL_IF_ERR(
+      results_ptr->RawData(
+          "OUTPUT1", (const uint8_t**)&output1_data, &output1_byte_size),
+      "unable to get result data for 'OUTPUT1'");
+  if (output0_byte_size != 64) {
+    std::cerr << "error: received incorrect byte size for 'OUTPUT1': "
+              << output0_byte_size << std::endl;
+    exit(1);
+  }
+
+  for (size_t i = 0; i < 16; ++i) {
+    std::cout << input0_data[i] << " + " << input1_data[i] << " = "
+              << *(output0_data + i) << std::endl;
+    std::cout << input0_data[i] << " - " << input1_data[i] << " = "
+              << *(output1_data + i) << std::endl;
+
+    if ((input0_data[i] + input1_data[i]) != *(output0_data + i)) {
+      std::cerr << "error: incorrect sum" << std::endl;
+      exit(1);
+    }
+    if ((input0_data[i] - input1_data[i]) != *(output1_data + i)) {
+      std::cerr << "error: incorrect difference" << std::endl;
+      exit(1);
+    }
+  }
+
+  // Get full response
+  std::cout << results_ptr->DebugString() << std::endl;
+
+  tc::InferStat infer_stat;
+  client->ClientInferStat(&infer_stat);
+  std::cout << "======Client Statistics======" << std::endl;
+  std::cout << "completed_request_count " << infer_stat.completed_request_count
+            << std::endl;
+  std::cout << "cumulative_total_request_time_ns "
+            << infer_stat.cumulative_total_request_time_ns << std::endl;
+  std::cout << "cumulative_send_time_ns " << infer_stat.cumulative_send_time_ns
+            << std::endl;
+  std::cout << "cumulative_receive_time_ns "
+            << infer_stat.cumulative_receive_time_ns << std::endl;
+
+  std::string model_stat;
+  FAIL_IF_ERR(
+      client->ModelInferenceStatistics(&model_stat, model_name),
+      "unable to get model statistics");
+  std::cout << "======Model Statistics======" << std::endl;
+  std::cout << model_stat << std::endl;
+
+  std::cout << "PASS : Infer" << std::endl;
+
+  return 0;
+}
--- a/src/c++/examples/simple_http_model_control.cc
+++ b/src/c++/examples/simple_http_model_control.cc
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <unistd.h>
+
+#include <iostream>
+#include <string>
+
+#include "http_client.h"
+#include "json_utils.h"
+
+namespace tc = triton::client;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service>" << std::endl;
+  std::cerr << "\t-H <HTTP header>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl;
+
+  exit(1);
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  std::string url("localhost:8000");
+  tc::Headers http_headers;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
+    switch (opt) {
+      case 'v':
+        verbose = true;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  std::string model_name = "simple";
+
+  // Create a InferenceServerHttpClient instance to communicate with the
+  // server using http protocol.
+  std::unique_ptr<tc::InferenceServerHttpClient> client;
+  FAIL_IF_ERR(
+      tc::InferenceServerHttpClient::Create(&client, url, verbose),
+      "unable to create http client");
+
+  {
+    std::string repository_index;
+    FAIL_IF_ERR(
+        client->ModelRepositoryIndex(&repository_index, http_headers),
+        "Failed to get repository index");
+    rapidjson::Document repository_index_json;
+    FAIL_IF_ERR(
+        tc::ParseJson(&repository_index_json, repository_index),
+        "failed to parse model config");
+    if (repository_index_json.Size() != 7) {
+      std::cerr << "expected number of models 7, got "
+                << repository_index_json.Size() << std::endl;
+      exit(1);
+    }
+  }
+
+
+  FAIL_IF_ERR(
+      client->LoadModel(model_name, http_headers), "Failed to load model");
+  bool model_ready;
+  FAIL_IF_ERR(
+      client->IsModelReady(&model_ready, model_name),
+      "unable to get model readiness");
+  if (!model_ready) {
+    std::cerr << "error: model " << model_name << " is not live" << std::endl;
+    exit(1);
+  }
+
+  FAIL_IF_ERR(
+      client->UnloadModel(model_name, http_headers), "Failed to unload model");
+  FAIL_IF_ERR(
+      client->IsModelReady(&model_ready, model_name),
+      "unable to get model readiness");
+  if (model_ready) {
+    std::cerr << "error: model " << model_name << " is live after unloading"
+              << std::endl;
+    exit(1);
+  }
+
+  tc::Error err = client->LoadModel("wrong_model_name", http_headers);
+  if (err.IsOk()) {
+    std::cerr << "error: wrong model name was successfully loaded" << std::endl;
+  }
+  return 0;
+}
--- a/src/c++/examples/simple_http_sequence_sync_infer_client.cc
+++ b/src/c++/examples/simple_http_sequence_sync_infer_client.cc
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <unistd.h>
+
+#include <condition_variable>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "http_client.h"
+
+namespace tc = triton::client;
+
+using ResultList = std::vector<std::shared_ptr<tc::InferResult>>;
+
+// Global mutex to synchronize the threads
+std::mutex mutex_;
+std::condition_variable cv_;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service and its http port>"
+            << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl;
+  std::cerr << "\t-o <offset for sequence ID>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr << "For -o, the client will use sequence ID <1 + 2 * offset> "
+            << "and <2 + 2 * offset>. Default offset is 0." << std::endl;
+
+  exit(1);
+}
+
+void
+SyncSend(
+    const std::unique_ptr<tc::InferenceServerHttpClient>& client,
+    tc::InferOptions& options, int32_t value, std::vector<int32_t>& result_data,
+    tc::Headers& http_headers)
+{
+  // Initialize the inputs with the data.
+  tc::InferInput* input;
+  std::vector<int64_t> shape{1, 1};
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input, "INPUT", shape, "INT32"),
+      "unable to create 'INPUT'");
+  std::shared_ptr<tc::InferInput> ivalue(input);
+  FAIL_IF_ERR(ivalue->Reset(), "unable to reset 'INPUT'");
+  FAIL_IF_ERR(
+      ivalue->AppendRaw(reinterpret_cast<uint8_t*>(&value), sizeof(int32_t)),
+      "unable to set data for 'INPUT'");
+
+  std::vector<tc::InferInput*> inputs = {ivalue.get()};
+
+  tc::InferRequestedOutput* output;
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output, "OUTPUT"),
+      "unable to get 'OUTPUT'");
+  std::shared_ptr<const tc::InferRequestedOutput> routput;
+  routput.reset(output);
+
+  std::vector<const tc::InferRequestedOutput*> outputs = {routput.get()};
+
+  tc::InferResult* result;
+  // Send inference request to the inference server.
+  FAIL_IF_ERR(
+      client->Infer(&result, options, inputs, outputs, http_headers),
+      "unable to run model");
+  std::shared_ptr<tc::InferResult> this_result(result);
+
+  // Get pointers to the result returned...
+  int32_t* output_data;
+  size_t output_byte_size;
+  FAIL_IF_ERR(
+      this_result->RawData(
+          "OUTPUT", (const uint8_t**)&output_data, &output_byte_size),
+      "unable to get result data for 'OUTPUT'");
+  if (output_byte_size != 4) {
+    std::cerr << "error: received incorrect byte size for 'OUTPUT': "
+              << output_byte_size << std::endl;
+    exit(1);
+  }
+
+  result_data.push_back(*output_data);
+}
+
+void
+SyncSend(
+    const std::unique_ptr<tc::InferenceServerHttpClient>& client,
+    const std::string& model_name, int32_t value, const uint64_t sequence_id,
+    bool start_of_sequence, bool end_of_sequence,
+    std::vector<int32_t>& result_data, tc::Headers& http_headers)
+{
+  tc::InferOptions options(model_name);
+  options.sequence_id_ = sequence_id;
+  options.sequence_start_ = start_of_sequence;
+  options.sequence_end_ = end_of_sequence;
+
+  SyncSend(client, options, value, result_data, http_headers);
+}
+
+void
+SyncSend(
+    const std::unique_ptr<tc::InferenceServerHttpClient>& client,
+    const std::string& model_name, int32_t value,
+    const std::string& sequence_id, bool start_of_sequence,
+    bool end_of_sequence, std::vector<int32_t>& result_data,
+    tc::Headers& http_headers)
+{
+  tc::InferOptions options(model_name);
+  options.sequence_id_str_ = sequence_id;
+  options.sequence_start_ = start_of_sequence;
+  options.sequence_end_ = end_of_sequence;
+
+  SyncSend(client, options, value, result_data, http_headers);
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  bool dyna_sequence = false;
+  std::string url("localhost:8000");
+  int sequence_id_offset = 0;
+  tc::Headers http_headers;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vdu:H:o:")) != -1) {
+    switch (opt) {
+      case 'v':
+        verbose = true;
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case 'd':
+        dyna_sequence = true;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 'o':
+        sequence_id_offset = std::stoi(optarg);
+        break;
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  tc::Error err;
+
+  // We use the custom "sequence" model which takes 1 input value. The
+  // output is the accumulated value of the inputs. See
+  // src/custom/sequence.
+  std::string int_model_name =
+      dyna_sequence ? "simple_dyna_sequence" : "simple_sequence";
+  std::string string_model_name =
+      dyna_sequence ? "simple_string_dyna_sequence" : "simple_sequence";
+
+  const uint64_t int_sequence_id0 = 1 + sequence_id_offset * 2;
+  const uint64_t int_sequence_id1 = 2 + sequence_id_offset * 2;
+
+  // For string sequence IDs, the dyna backend requires that the
+  // sequence id be decodable into an integer, otherwise we'll use
+  // a test string sequence id and a model that doesn't require corrid
+  // control.
+  const std::string string_sequence_id0 =
+      dyna_sequence ? std::to_string(3 + sequence_id_offset * 2) : "SEQ-3";
+
+  std::cout << "sequence ID " << int_sequence_id0 << " : "
+            << "sequence ID " << int_sequence_id1 << " : "
+            << "sequence ID " << string_sequence_id0 << std::endl;
+
+  // Create a InferenceServerHttpClient instance to communicate with the
+  // server using http protocol.
+  std::unique_ptr<tc::InferenceServerHttpClient> client;
+  FAIL_IF_ERR(
+      tc::InferenceServerHttpClient::Create(&client, url, verbose),
+      "unable to create http client");
+
+  // Now send the inference sequences..
+  //
+  std::vector<int32_t> values{11, 7, 5, 3, 2, 0, 1};
+  std::vector<int32_t> int_result0_data;
+  std::vector<int32_t> int_result1_data;
+  std::vector<int32_t> string_result0_data;
+
+  // Send requests, first reset accumulator for the sequence.
+  SyncSend(
+      client, int_model_name, 0, int_sequence_id0, true /* start-of-sequence */,
+      false /* end-of-sequence */, int_result0_data, http_headers);
+  SyncSend(
+      client, int_model_name, 100, int_sequence_id1,
+      true /* start-of-sequence */, false /* end-of-sequence */,
+      int_result1_data, http_headers);
+  SyncSend(
+      client, string_model_name, 20, string_sequence_id0,
+      true /* start-of-sequence */, false /* end-of-sequence */,
+      string_result0_data, http_headers);
+
+  // Now send a sequence of values...
+  for (int32_t v : values) {
+    SyncSend(
+        client, int_model_name, v, int_sequence_id0,
+        false /* start-of-sequence */, (v == 1) /* end-of-sequence */,
+        int_result0_data, http_headers);
+    SyncSend(
+        client, int_model_name, -v, int_sequence_id1,
+        false /* start-of-sequence */, (v == 1) /* end-of-sequence */,
+        int_result1_data, http_headers);
+    SyncSend(
+        client, string_model_name, -v, string_sequence_id0,
+        false /* start-of-sequence */, (v == 1) /* end-of-sequence */,
+        string_result0_data, http_headers);
+  }
+
+  for (size_t i = 0; i < int_result0_data.size(); i++) {
+    int32_t int_seq0_expected = (i == 0) ? 1 : values[i - 1];
+    int32_t int_seq1_expected = (i == 0) ? 101 : values[i - 1] * -1;
+    int32_t string_seq0_expected;
+
+    // For string sequence ID case we are testing two different backends
+    if ((i == 0) && dyna_sequence) {
+      string_seq0_expected = 20;
+    } else if ((i == 0) && !dyna_sequence) {
+      string_seq0_expected = 21;
+    } else if ((i != 0) && dyna_sequence) {
+      string_seq0_expected = values[i - 1] * -1 + string_result0_data[i - 1];
+    } else {
+      string_seq0_expected = values[i - 1] * -1;
+    }
+
+    // The dyna_sequence custom backend adds the sequence ID to
+    // the last request in a sequence.
+    if (dyna_sequence && (i != 0) && (values[i - 1] == 1)) {
+      int_seq0_expected += int_sequence_id0;
+      int_seq1_expected += int_sequence_id1;
+      string_seq0_expected += std::stoi(string_sequence_id0);
+    }
+
+    std::cout << "[" << i << "] " << int_result0_data[i] << " : "
+              << int_result1_data[i] << " : " << string_result0_data[i]
+              << std::endl;
+
+    if ((int_seq0_expected != int_result0_data[i]) ||
+        (int_seq1_expected != int_result1_data[i]) ||
+        (string_seq0_expected != string_result0_data[i])) {
+      std::cout << "[ expected ] " << int_seq0_expected << " : "
+                << int_seq1_expected << " : " << string_seq0_expected
+                << std::endl;
+      return 1;
+    }
+  }
+
+  return 0;
+}
--- a/src/c++/examples/simple_http_shm_client.cc
+++ b/src/c++/examples/simple_http_shm_client.cc
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <unistd.h>
+
+#include <iostream>
+#include <string>
+
+#include "http_client.h"
+#include "shm_utils.h"
+
+namespace tc = triton::client;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+ValidateShapeAndDatatype(
+    const std::string& name, std::shared_ptr<tc::InferResult> result)
+
+{
+  std::vector<int64_t> shape;
+  FAIL_IF_ERR(
+      result->Shape(name, &shape), "unable to get shape for '" + name + "'");
+  // Validate shape
+  if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
+    std::cerr << "error: received incorrect shapes for '" << name << "'"
+              << std::endl;
+    exit(1);
+  }
+  std::string datatype;
+  FAIL_IF_ERR(
+      result->Datatype(name, &datatype),
+      "unable to get datatype for '" + name + "'");
+  // Validate datatype
+  if (datatype.compare("INT32") != 0) {
+    std::cerr << "error: received incorrect datatype for '" << name
+              << "': " << datatype << std::endl;
+    exit(1);
+  }
+}
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service>" << std::endl;
+  std::cerr << "\t-H <HTTP header>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl;
+
+  exit(1);
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  std::string url("localhost:8000");
+  tc::Headers http_headers;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
+    switch (opt) {
+      case 'v':
+        verbose = true;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  // We use a simple model that takes 2 input tensors of 16 integers
+  // each and returns 2 output tensors of 16 integers each. One output
+  // tensor is the element-wise sum of the inputs and one output is
+  // the element-wise difference.
+  std::string model_name = "simple";
+  std::string model_version = "";
+
+  // Create a InferenceServerHttpClient instance to communicate with the
+  // server using http protocol.
+  std::unique_ptr<tc::InferenceServerHttpClient> client;
+  FAIL_IF_ERR(
+      tc::InferenceServerHttpClient::Create(&client, url, verbose),
+      "unable to create http client");
+
+  // Unregistering all shared memory regions for a clean
+  // start.
+  FAIL_IF_ERR(
+      client->UnregisterSystemSharedMemory(),
+      "unable to unregister all system shared memory regions");
+  FAIL_IF_ERR(
+      client->UnregisterCudaSharedMemory(),
+      "unable to unregister all cuda shared memory regions");
+
+  std::vector<int64_t> shape{1, 16};
+  size_t input_byte_size = 64;
+  size_t output_byte_size = 64;
+
+  // Initialize the inputs with the data.
+  tc::InferInput* input0;
+  tc::InferInput* input1;
+
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
+      "unable to get INPUT0");
+  std::shared_ptr<tc::InferInput> input0_ptr;
+  input0_ptr.reset(input0);
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
+      "unable to get INPUT1");
+  std::shared_ptr<tc::InferInput> input1_ptr;
+  input1_ptr.reset(input1);
+
+  // Create Input0 and Input1 in Shared Memory. Initialize Input0 to unique
+  // integers and Input1 to all ones.
+  std::string shm_key = "/input_simple";
+  int shm_fd_ip, *input0_shm;
+  FAIL_IF_ERR(
+      tc::CreateSharedMemoryRegion(shm_key, input_byte_size * 2, &shm_fd_ip),
+      "");
+  FAIL_IF_ERR(
+      tc::MapSharedMemory(
+          shm_fd_ip, 0, input_byte_size * 2, (void**)&input0_shm),
+      "");
+  FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_ip), "");
+  int* input1_shm = (int*)(input0_shm + 16);
+  for (size_t i = 0; i < 16; ++i) {
+    *(input0_shm + i) = i;
+    *(input1_shm + i) = 1;
+  }
+
+  FAIL_IF_ERR(
+      client->RegisterSystemSharedMemory(
+          "input_data", "/input_simple", input_byte_size * 2),
+      "failed to register input shared memory region");
+
+  FAIL_IF_ERR(
+      input0_ptr->SetSharedMemory(
+          "input_data", input_byte_size, 0 /* offset */),
+      "unable to set shared memory for INPUT0");
+  FAIL_IF_ERR(
+      input1_ptr->SetSharedMemory(
+          "input_data", input_byte_size, input_byte_size /* offset */),
+      "unable to set shared memory for INPUT1");
+
+  // Generate the outputs to be requested.
+  tc::InferRequestedOutput* output0;
+  tc::InferRequestedOutput* output1;
+
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
+      "unable to get 'OUTPUT0'");
+  std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
+  output0_ptr.reset(output0);
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
+      "unable to get 'OUTPUT1'");
+  std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
+  output1_ptr.reset(output1);
+
+  // Create Output0 and Output1 in Shared Memory
+  shm_key = "/output_simple";
+  int shm_fd_op;
+  int* output0_shm;
+  FAIL_IF_ERR(
+      tc::CreateSharedMemoryRegion(shm_key, output_byte_size * 2, &shm_fd_op),
+      "");
+  FAIL_IF_ERR(
+      tc::MapSharedMemory(
+          shm_fd_op, 0, output_byte_size * 2, (void**)&output0_shm),
+      "");
+  FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_op), "");
+  int* output1_shm = (int*)(output0_shm + 16);
+
+
+  FAIL_IF_ERR(
+      client->RegisterSystemSharedMemory(
+          "output_data", "/output_simple", output_byte_size * 2),
+      "failed to register output shared memory region");
+
+  FAIL_IF_ERR(
+      output0_ptr->SetSharedMemory(
+          "output_data", output_byte_size, 0 /* offset */),
+      "unable to set shared memory for 'OUTPUT0'");
+  FAIL_IF_ERR(
+      output1_ptr->SetSharedMemory(
+          "output_data", output_byte_size, output_byte_size /* offset */),
+      "unable to set shared memory for 'OUTPUT1'");
+
+
+  // The inference settings. Will be using default for now.
+  tc::InferOptions options(model_name);
+  options.model_version_ = model_version;
+
+  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
+  std::vector<const tc::InferRequestedOutput*> outputs = {
+      output0_ptr.get(), output1_ptr.get()};
+
+  tc::InferResult* results;
+  FAIL_IF_ERR(
+      client->Infer(&results, options, inputs, outputs, http_headers),
+      "unable to run model");
+  std::shared_ptr<tc::InferResult> results_ptr;
+  results_ptr.reset(results);
+
+  // Validate the results...
+  ValidateShapeAndDatatype("OUTPUT0", results_ptr);
+  ValidateShapeAndDatatype("OUTPUT1", results_ptr);
+
+  for (size_t i = 0; i < 16; ++i) {
+    std::cout << input0_shm[i] << " + " << input1_shm[i] << " = "
+              << output0_shm[i] << std::endl;
+    std::cout << input0_shm[i] << " - " << input1_shm[i] << " = "
+              << output1_shm[i] << std::endl;
+
+    if ((input0_shm[i] + input1_shm[i]) != output0_shm[i]) {
+      std::cerr << "error: incorrect sum" << std::endl;
+      exit(1);
+    }
+    if ((input0_shm[i] - input1_shm[i]) != output1_shm[i]) {
+      std::cerr << "error: incorrect difference" << std::endl;
+      exit(1);
+    }
+  }
+
+  // Get shared memory regions active/registered within triton
+  std::string shm_status;
+  FAIL_IF_ERR(
+      client->SystemSharedMemoryStatus(&shm_status),
+      "failed to get shared memory status");
+  std::cout << "Shared Memory Status:\n" << shm_status << "\n";
+
+  // Unregister shared memory
+  FAIL_IF_ERR(
+      client->UnregisterSystemSharedMemory("input_data"),
+      "unable to unregister shared memory input region");
+  FAIL_IF_ERR(
+      client->UnregisterSystemSharedMemory("output_data"),
+      "unable to unregister shared memory output region");
+
+  // Cleanup shared memory
+  FAIL_IF_ERR(tc::UnmapSharedMemory(input0_shm, input_byte_size * 2), "");
+  FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/input_simple"), "");
+  FAIL_IF_ERR(tc::UnmapSharedMemory(output0_shm, output_byte_size * 2), "");
+  FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/output_simple"), "");
+
+  std::cout << "PASS : System Shared Memory " << std::endl;
+
+  return 0;
+}
--- a/src/c++/examples/simple_http_string_infer_client.cc
+++ b/src/c++/examples/simple_http_string_infer_client.cc
+// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <getopt.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <string>
+
+#include "http_client.h"
+
+namespace tc = triton::client;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+ValidateShapeAndDatatype(
+    const std::string& name, std::shared_ptr<tc::InferResult> result)
+{
+  std::vector<int64_t> shape;
+  FAIL_IF_ERR(result->Shape(name, &shape), "unable to get shape for " + name);
+  // Validate shape
+  if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
+    std::cerr << "error: received incorrect shapes for " << name << std::endl;
+    exit(1);
+  }
+  std::string datatype;
+  FAIL_IF_ERR(
+      result->Datatype(name, &datatype), "unable to get datatype for " + name);
+  // Validate datatype
+  if (datatype.compare("BYTES") != 0) {
+    std::cerr << "error: received incorrect datatype for " << name << ": "
+              << datatype << std::endl;
+    exit(1);
+  }
+}
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service>" << std::endl;
+  std::cerr << "\t-H <HTTP header>" << std::endl;
+  std::cerr << "\t--json-input-data" << std::endl;
+  std::cerr << "\t--json-output-data" << std::endl;
+  std::cerr << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl;
+
+  exit(1);
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  std::string url("localhost:8000");
+  tc::Headers http_headers;
+  bool json_input_data{false};
+  bool json_output_data{false};
+
+  // {name, has_arg, *flag, val}
+  static struct option long_options[] = {
+      {"json-input-data", 0, 0, 0},
+      {"json-output-data", 0, 0, 1},
+      {0, 0, 0, 0}};
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt_long(argc, argv, "vu:H:", long_options, NULL)) != -1) {
+    switch (opt) {
+      case 0:
+        json_input_data = true;
+        break;
+      case 1:
+        json_output_data = true;
+        break;
+      case 'v':
+        verbose = true;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  // We use a simple model that takes 2 input tensors of 16 strings
+  // each and returns 2 output tensors of 16 strings each. The input
+  // strings must represent integers. One output tensor is the
+  // element-wise sum of the inputs and one output is the element-wise
+  // difference.
+  std::string model_name = "simple_string";
+  std::string model_version = "";
+
+  // Create a InferenceServerHttpClient instance to communicate with the
+  // server using http protocol.
+  std::unique_ptr<tc::InferenceServerHttpClient> client;
+  FAIL_IF_ERR(
+      tc::InferenceServerHttpClient::Create(&client, url, verbose),
+      "unable to create http client");
+
+  // Create the data for the two input tensors. Initialize the first
+  // to unique integers and the second to all ones. The input tensors
+  // are the string representation of these values.
+  std::vector<std::string> input0_data(16);
+  std::vector<std::string> input1_data(16);
+  std::vector<int32_t> expected_sum(16);
+  std::vector<int32_t> expected_diff(16);
+  for (size_t i = 0; i < 16; ++i) {
+    input0_data[i] = std::to_string(i);
+    input1_data[i] = std::to_string(1);
+    expected_sum[i] = i + 1;
+    expected_diff[i] = i - 1;
+  }
+
+  std::vector<int64_t> shape{1, 16};
+
+  // Initialize the inputs with the data.
+  tc::InferInput* input0;
+  tc::InferInput* input1;
+
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input0, "INPUT0", shape, "BYTES"),
+      "unable to get INPUT0");
+  std::shared_ptr<tc::InferInput> input0_ptr;
+  input0_ptr.reset(input0);
+  input0_ptr->SetBinaryData(!json_input_data);
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input1, "INPUT1", shape, "BYTES"),
+      "unable to get INPUT1");
+  std::shared_ptr<tc::InferInput> input1_ptr;
+  input1_ptr.reset(input1);
+  input1_ptr->SetBinaryData(!json_input_data);
+
+  FAIL_IF_ERR(
+      input0_ptr->AppendFromString(input0_data),
+      "unable to set data for INPUT0");
+  FAIL_IF_ERR(
+      input1_ptr->AppendFromString(input1_data),
+      "unable to set data for INPUT1");
+
+  // Generate the outputs to be requested.
+  tc::InferRequestedOutput* output0;
+  tc::InferRequestedOutput* output1;
+
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
+      "unable to get OUTPUT0");
+  std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
+  output0_ptr.reset(output0);
+  output0_ptr->SetBinaryData(!json_output_data);
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
+      "unable to get OUTPUT1");
+  std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
+  output1_ptr.reset(output1);
+  output1_ptr->SetBinaryData(!json_output_data);
+
+
+  // The inference settings. Will be using default for now.
+  tc::InferOptions options(model_name);
+  options.model_version_ = model_version;
+
+  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
+  std::vector<const tc::InferRequestedOutput*> outputs = {
+      output0_ptr.get(), output1_ptr.get()};
+
+  tc::InferResult* results;
+  FAIL_IF_ERR(
+      client->Infer(&results, options, inputs, outputs, http_headers),
+      "unable to run model");
+  std::shared_ptr<tc::InferResult> results_ptr;
+  results_ptr.reset(results);
+
+  // Validate the results...
+  ValidateShapeAndDatatype("OUTPUT0", results_ptr);
+  ValidateShapeAndDatatype("OUTPUT1", results_ptr);
+
+  // Get the result data
+  std::vector<std::string> result0_data;
+  std::vector<std::string> result1_data;
+  FAIL_IF_ERR(
+      results_ptr->StringData("OUTPUT0", &result0_data),
+      "unable to get data for OUTPUT0");
+  if (result0_data.size() != 16) {
+    std::cerr << "error: received incorrect number of strings for OUTPUT0: "
+              << result0_data.size() << std::endl;
+    exit(1);
+  }
+
+  FAIL_IF_ERR(
+      results_ptr->StringData("OUTPUT1", &result1_data),
+      "unable to get data for OUTPUT1");
+  if (result1_data.size() != 16) {
+    std::cerr << "error: received incorrect number of strings for OUTPUT1: "
+              << result1_data.size() << std::endl;
+    exit(1);
+  }
+
+
+  for (size_t i = 0; i < 16; ++i) {
+    std::cout << input0_data[i] << " + " << input0_data[i] << " = "
+              << result0_data[i] << std::endl;
+    std::cout << input0_data[i] << " - " << input1_data[i] << " = "
+              << result1_data[i] << std::endl;
+
+    if (expected_sum[i] != std::stoi(result0_data[i])) {
+      std::cerr << "error: incorrect sum" << std::endl;
+      exit(1);
+    }
+    if (expected_diff[i] != std::stoi(result1_data[i])) {
+      std::cerr << "error: incorrect difference" << std::endl;
+      exit(1);
+    }
+  }
+
+  // Get full response
+  std::cout << results_ptr->DebugString() << std::endl;
+
+  std::cout << "PASS : String Infer" << std::endl;
+
+  return 0;
+}
--- a/src/c++/examples/yolov7-tiny.cc
+++ b/src/c++/examples/yolov7-tiny.cc
+#include <dirent.h>
+#include <getopt.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <algorithm>
+#include <condition_variable>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <mutex>
+#include <queue>
+#include <string>
+#include "grpc_client.h"
+#include "http_client.h"
+#include "json_utils.h"
+
+#include <opencv2/opencv.hpp>
+#include <opencv2/core/version.hpp>
+#if CV_MAJOR_VERSION == 2
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+#elif CV_MAJOR_VERSION >= 3
+#include <opencv2/core.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/imgproc.hpp>
+#endif
+
+#if CV_MAJOR_VERSION == 4
+#define GET_TRANSFORMATION_CODE(x) cv::COLOR_##x
+#else
+#define GET_TRANSFORMATION_CODE(x) CV_##x
+#endif
+
+using namespace cv;
+namespace tc = triton::client;
+
+namespace {
+
+enum ProtocolType { HTTP = 0, GRPC = 1 };
+
+struct ModelInfo {
+    std::string output_name_;
+    std::string input_name_;
+    std::string input_datatype_;
+    int input_c_;
+    int input_h_;
+    int input_w_;
+    std::string input_format_;
+    int type1_;
+    int type3_;
+    int max_batch_size_;
+};
+
+typedef struct  _ResultOfDetection
+{
+    cv::Rect boundingBox;
+    float confidence;
+    int classID;
+    std::string className;
+    bool exist;
+
+    _ResultOfDetection():confidence(0.0f),classID(0),exist(true){}
+
+}ResultOfDetection;
+
+std::vector<int> NMSBoxes(const std::vector<cv::Rect>& boxes, std::vector<float>& scores, float score_threshold, float nms_threshold)
+{
+    std::vector<int> indices;
+    std::vector<float> areas(boxes.size());
+
+    for (size_t i = 0; i < boxes.size(); i++)
+    {
+        areas[i] = boxes[i].width * boxes[i].height;
+    }
+
+    for (size_t i = 0; i < boxes.size(); i++)
+    {
+        if (scores[i] > score_threshold)
+        {
+            indices.push_back(static_cast<int>(i));
+            for (size_t j = i + 1; j < boxes.size(); j++)
+            {
+                if (scores[j] > score_threshold)
+                {
+                    cv::Rect intersection = boxes[i] & boxes[j];
+                    float overlap = static_cast<float>(intersection.area()) / (areas[i] + areas[j] - intersection.area());
+
+                    if (overlap > nms_threshold)
+                    {
+                        scores[j] = 0.0f;
+                    }
+                }
+            }
+        }
+    }
+
+    return indices;
+}
+
+void Preprocess(
+    const std::string& filename, int img_type1, int img_type3, size_t img_channels, 
+    const cv::Size& img_size, std::vector<uint8_t>* input_data)
+{
+
+    cv::Mat img = cv::imread(filename, 1);
+    if (img.empty()) {
+        std::cerr << "error: unable to decode image " << filename << std::endl;
+        exit(1);
+    }
+
+    cv::Mat sample;
+    if ((img.channels() == 3) && (img_channels == 3)) {
+        cv::cvtColor(img, sample, GET_TRANSFORMATION_CODE(BGR2RGB));
+    } else {
+        std::cerr << "unexpected number of channels " << img.channels()
+                  << " in input image, model expects " << img_channels << "."
+                  << std::endl;
+        exit(1);
+    }
+
+    cv::Mat sample_resized;
+    cv::resize(sample, sample_resized, img_size);
+
+    cv::Mat sample_type;
+    sample_resized.convertTo(sample_type, (img_channels == 3) ? img_type3 : img_type1);
+
+    cv::Mat sample_final;
+    sample_final = sample_type.mul(cv::Scalar(1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0));;
+
+    size_t img_byte_size = sample_final.total() * sample_final.elemSize();
+    size_t pos = 0;
+    input_data->resize(img_byte_size);
+
+    std::vector<cv::Mat> input_bgr_channels;
+    for (size_t i = 0; i < img_channels; ++i) {
+        input_bgr_channels.emplace_back(img_size.height, img_size.width, img_type1, &((*input_data)[pos]));
+        pos += input_bgr_channels.back().total() * input_bgr_channels.back().elemSize();
+    }
+
+    cv::split(sample_final, input_bgr_channels);
+
+    if (pos != img_byte_size) {
+        std::cerr << "unexpected total size of channels " << pos << ", expecting "
+                  << img_byte_size << std::endl;
+        exit(1);
+    }
+}
+
+
+void Postprocess(
+    const std::unique_ptr<tc::InferResult> result,
+    const std::vector<std::string>& filenames, const size_t batch_size,
+    const std::string& output_name, const bool batching)
+{
+    if (!result->RequestStatus().IsOk()) {
+        std::cerr << "inference  failed with error: " << result->RequestStatus()
+                  << std::endl;
+        exit(1);
+    }
+    if (filenames.size() != batch_size) {
+        std::cerr << "expected " << batch_size << " filenames, got "
+                  << filenames.size() << std::endl;
+        exit(1);
+    }
+
+    std::vector<int64_t> shape;
+    tc::Error err = result->Shape(output_name, &shape);
+    if (!err.IsOk()) {
+        std::cerr << "unable to get shape for " << output_name << std::endl;
+        exit(1);
+    }
+
+    std::string datatype;
+    err = result->Datatype(output_name, &datatype);
+    if (!err.IsOk()) {
+        std::cerr << "unable to get datatype for " << output_name << std::endl;
+        exit(1);
+    }
+
+    const uint8_t* result_data;
+    size_t outputCount = 0;
+    err = result->RawData(output_name, &result_data, &outputCount);
+    if (!err.IsOk()) {
+         std::cerr << "unable to get data for " << output_name << std::endl;
+         exit(1);
+    }
+
+    std::vector<cv::Mat> outs;
+    cv::Mat srcImage = cv::imread(filenames[0], 1);
+    int Shape[]={shape[0], shape[1], shape[2]};
+    cv::Mat out(3, Shape, CV_32F);
+    memcpy(out.data, result_data, sizeof(uint8_t)*outputCount);
+    outs.push_back(out);
+
+    int numProposal = outs[0].size[1];
+    int numOut = outs[0].size[2];
+    outs[0] = outs[0].reshape(0, numProposal);
+
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    std::vector<int> classIds;
+    float ratioh = (float)srcImage.rows / 640, ratiow = (float)srcImage.cols / 640;
+
+    int n = 0, rowInd = 0;
+    float* pdata = (float*)outs[0].data;
+    for (n = 0; n < numProposal; n++)
+    {
+        float boxScores = pdata[4];
+        if (boxScores > 0.5)
+        {
+            cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
+            cv::Point classIdPoint;
+            double maxClassScore;
+            cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
+            maxClassScore *= boxScores;
+            if (maxClassScore > 0.25)
+            {
+                const int classIdx = classIdPoint.x;
+                float cx = pdata[0] * ratiow;
+                float cy = pdata[1] * ratioh;
+                float w = pdata[2] * ratiow;
+                float h = pdata[3] * ratioh;
+
+                int left = int(cx - 0.5 * w);
+                int top = int(cy - 0.5 * h);
+
+                confidences.push_back((float)maxClassScore);
+                boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
+                classIds.push_back(classIdx);
+            }
+        }
+        rowInd++;
+        pdata += numOut;
+    }
+
+    std::vector<int> indices;
+    indices = NMSBoxes(boxes, confidences, 0.25, 0.5);
+    std::vector<ResultOfDetection> resultsOfDetection;
+    for (size_t i = 0; i < indices.size(); ++i)
+    {
+        int idx = indices[i];
+        int classID=classIds[idx];
+        float confidence=confidences[idx];
+        cv::Rect box = boxes[idx];
+
+        ResultOfDetection result;
+        result.boundingBox=box;
+        result.confidence=confidence;
+        result.classID=classID;
+        resultsOfDetection.push_back(result);
+    }
+    fprintf(stdout,"//////////////Detection Results//////////////\n");
+    for( size_t i = 0; i < resultsOfDetection.size(); ++i)
+    {
+        ResultOfDetection result = resultsOfDetection[i];
+        cv::rectangle(srcImage, result.boundingBox, cv::Scalar(0,255,255),2);
+
+        fprintf(stdout,"box:%d %d %d %d,label:%d,confidence:%.3f\n",result.boundingBox.x,
+        result.boundingBox.y,result.boundingBox.width,result.boundingBox.height,result.classID,result.confidence);
+    }
+    cv::imwrite("result.jpg", srcImage);
+}
+
+bool ParseType(const std::string& dtype, int* type1, int* type3)
+{
+    if (dtype.compare("UINT8") == 0) {
+        *type1 = CV_8UC1;
+        *type3 = CV_8UC3;
+    } else if (dtype.compare("INT8") == 0) {
+        *type1 = CV_8SC1;
+        *type3 = CV_8SC3;
+    } else if (dtype.compare("UINT16") == 0) {
+        *type1 = CV_16UC1;
+        *type3 = CV_16UC3;
+    } else if (dtype.compare("INT16") == 0) {
+        *type1 = CV_16SC1;
+        *type3 = CV_16SC3;
+    } else if (dtype.compare("INT32") == 0) {
+        *type1 = CV_32SC1;
+        *type3 = CV_32SC3;
+    } else if (dtype.compare("FP32") == 0) {
+        *type1 = CV_32FC1;
+        *type3 = CV_32FC3;
+    } else if (dtype.compare("FP64") == 0) {
+        *type1 = CV_64FC1;
+        *type3 = CV_64FC3;
+    } else {
+        return false;
+    }
+
+    return true;
+}
+
+void ParseModelHttp(
+    const rapidjson::Document& model_metadata,
+    const rapidjson::Document& model_config, const size_t batch_size,
+    ModelInfo* model_info)
+{
+    const auto& input_itr = model_metadata.FindMember("inputs");
+    size_t input_count = 0;
+    if (input_itr != model_metadata.MemberEnd()) {
+        input_count = input_itr->value.Size();
+    }
+    if (input_count != 1) {
+        std::cerr << "expecting 1 input, got " << input_count << std::endl;
+        exit(1);
+    }
+
+    const auto& output_itr = model_metadata.FindMember("outputs");
+    size_t output_count = 0;
+    if (output_itr != model_metadata.MemberEnd()) {
+        output_count = output_itr->value.Size();
+    }
+    if (output_count != 1) {
+        std::cerr << "expecting 1 output, got " << output_count << std::endl;
+        exit(1);
+    }
+
+    const auto& input_config_itr = model_config.FindMember("input");
+    input_count = 0;
+    if (input_config_itr != model_config.MemberEnd()) {
+        input_count = input_config_itr->value.Size();
+    }
+    if (input_count != 1) {
+        std::cerr << "expecting 1 input in model configuration, got " << input_count
+                  << std::endl;
+        exit(1);
+    }
+
+    const auto& input_metadata = *input_itr->value.Begin();
+    const auto& input_config = *input_config_itr->value.Begin();
+    const auto& output_metadata = *output_itr->value.Begin();
+
+    const auto& output_dtype_itr = output_metadata.FindMember("datatype");
+    if (output_dtype_itr == output_metadata.MemberEnd()) {
+        std::cerr << "output missing datatype in the metadata for model'"
+                  << model_metadata["name"].GetString() << "'" << std::endl;
+        exit(1);
+    }
+    auto datatype = std::string(output_dtype_itr->value.GetString(),
+                    output_dtype_itr->value.GetStringLength());
+    if (datatype.compare("FP32") != 0) {
+        std::cerr << "expecting output datatype to be FP32, model '"
+                  << model_metadata["name"].GetString() << "' output type is '"
+                  << datatype << "'" << std::endl;
+        exit(1);
+    }
+
+    int max_batch_size = 0;
+    const auto bs_itr = model_config.FindMember("max_batch_size");
+    if (bs_itr != model_config.MemberEnd()) {
+        max_batch_size = bs_itr->value.GetUint();
+    }
+    model_info->max_batch_size_ = max_batch_size;
+
+    if (max_batch_size == 0) {
+        if (batch_size != 1) {
+            std::cerr << "batching not supported for model '"
+                      << model_metadata["name"].GetString() << "'" << std::endl;
+            exit(1);
+        }
+    } else {
+        if (batch_size > (size_t)max_batch_size) {
+            std::cerr << "expecting batch size <= " << max_batch_size
+                      << " for model '" << model_metadata["name"].GetString() << "'"
+                      << std::endl;
+            exit(1);
+        }
+    }
+
+    const bool input_batch_dim = (max_batch_size == 0);
+    const size_t expected_input_dims = 3 + (input_batch_dim ? 1 : 0);
+    const auto input_shape_itr = input_metadata.FindMember("shape");
+    if (input_shape_itr != input_metadata.MemberEnd()) {
+        if (input_shape_itr->value.Size() != expected_input_dims) {
+            std::cerr << " expecting input to have " << expected_input_dims
+                      << " dimensions, model '" << model_metadata["name"].GetString()
+                      << "' input has " << input_shape_itr->value.Size() << std::endl;
+        }
+    } else {
+        std::cerr << "input missing shape in the metadata for model'"
+                  << model_metadata["name"].GetString() << "'" << std::endl;
+        exit(1);
+    }
+
+    model_info->input_format_ = std::string(input_config["format"].GetString(), input_config["format"].GetStringLength());
+    model_info->output_name_ = std::string(output_metadata["name"].GetString(), output_metadata["name"].GetStringLength());
+    model_info->input_name_ = std::string(input_metadata["name"].GetString(), input_metadata["name"].GetStringLength());
+    model_info->input_datatype_ = std::string(input_metadata["datatype"].GetString(), input_metadata["datatype"].GetStringLength());
+
+    model_info->input_c_ = input_shape_itr->value[1].GetInt();
+    model_info->input_h_ = input_shape_itr->value[2].GetInt();
+    model_info->input_w_ = input_shape_itr->value[3].GetInt();
+
+    if (!ParseType(model_info->input_datatype_, &(model_info->type1_), &(model_info->type3_))) {
+        std::cerr << "unexpected input datatype '" << model_info->input_datatype_
+                  << "' for model \"" << model_metadata["name"].GetString()
+                  << std::endl;
+        exit(1);
+    }
+}
+
+union TritonClient {
+    TritonClient()
+    {
+        new (&http_client_) std::unique_ptr<tc::InferenceServerHttpClient>{};
+    }
+    ~TritonClient() {}
+
+    std::unique_ptr<tc::InferenceServerHttpClient> http_client_;
+    std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_;
+};
+
+}
+
+int
+main(int argc, char** argv)
+{
+    bool verbose = false;
+    bool async = false;
+    int batch_size = 1;
+
+    if (argc < 3 || argc > 3)
+    {
+        fprintf(stdout, "Two args are required: ./a yolov7-tiny image_path\n");
+        return -1;
+    }
+
+    std::string model_name = argv[1];
+    std::string fileName = argv[2];
+    std::string preprocess_output_filename;
+    std::string model_version = "";
+    std::string url("localhost:8000");
+    ProtocolType protocol = ProtocolType::HTTP;
+    tc::Headers http_headers;
+
+    TritonClient triton_client;
+    tc::Error err;
+    err = tc::InferenceServerHttpClient::Create(
+          &triton_client.http_client_, url, verbose);
+    if (!err.IsOk()) {
+        std::cerr << "error: unable to create client for inference: " << err << std::endl;
+        exit(1);
+    }
+
+    ModelInfo model_info;
+    std::string model_metadata;
+    err = triton_client.http_client_->ModelMetadata(&model_metadata, model_name, model_version, http_headers);
+    if (!err.IsOk()) {
+        std::cerr << "error: failed to get model metadata: " << err << std::endl;
+    }
+    rapidjson::Document model_metadata_json;
+    err = tc::ParseJson(&model_metadata_json, model_metadata);
+    if (!err.IsOk()) {
+        std::cerr << "error: failed to parse model metadata: " << err
+                  << std::endl;
+    }
+    std::string model_config;
+    err = triton_client.http_client_->ModelConfig(&model_config, model_name, model_version, http_headers);
+    if (!err.IsOk()) {
+        std::cerr << "error: failed to get model config: " << err << std::endl;
+    }
+    rapidjson::Document model_config_json;
+    err = tc::ParseJson(&model_config_json, model_config);
+    if (!err.IsOk()) {
+        std::cerr << "error: failed to parse model config: " << err << std::endl;
+    }
+    ParseModelHttp( model_metadata_json, model_config_json, batch_size, &model_info);
+
+    std::vector<std::string> image_filenames;
+    struct stat name_stat;
+    if (stat(fileName.c_str(), &name_stat) != 0) {
+        std::cerr << "Failed to find '" << fileName << "': " << strerror(errno) << std::endl;
+        exit(1);
+    }
+
+    if (name_stat.st_mode & S_IFDIR) {
+        const std::string dirname = fileName;
+        DIR* dir_ptr = opendir(dirname.c_str());
+        struct dirent* d_ptr;
+        while ((d_ptr = readdir(dir_ptr)) != NULL) {
+            const std::string filename = d_ptr->d_name;
+            if ((filename != ".") && (filename != "..")) {
+                image_filenames.push_back(dirname + "/" + filename);
+            }
+        }
+        closedir(dir_ptr);
+    } else {
+        image_filenames.push_back(fileName);
+    }
+
+    std::sort(image_filenames.begin(), image_filenames.end());
+
+    std::vector<std::vector<uint8_t>> image_data;
+    for (const auto& fn : image_filenames) {
+        image_data.emplace_back();
+        Preprocess(fn, model_info.type1_, model_info.type3_, model_info.input_c_, 
+                  cv::Size(model_info.input_w_, model_info.input_h_), &(image_data.back()));
+
+        if ((image_data.size() == 1) && !preprocess_output_filename.empty()) {
+            std::ofstream output_file(preprocess_output_filename);
+            std::ostream_iterator<uint8_t> output_iterator(output_file);
+            std::copy(image_data[0].begin(), image_data[0].end(), output_iterator);
+        }
+    }
+
+    std::vector<int64_t> shape;
+    shape.push_back(batch_size);
+    shape.push_back(model_info.input_c_);
+    shape.push_back(model_info.input_h_);
+    shape.push_back(model_info.input_w_);
+
+    tc::InferInput* input;
+    err = tc::InferInput::Create(&input, model_info.input_name_, shape, model_info.input_datatype_);
+    if (!err.IsOk()) {
+        std::cerr << "unable to get input: " << err << std::endl;
+        exit(1);
+    }
+    std::shared_ptr<tc::InferInput> input_ptr(input);
+
+    tc::InferRequestedOutput* output;
+    err = tc::InferRequestedOutput::Create(&output, model_info.output_name_);
+    if (!err.IsOk()) {
+        std::cerr << "unable to get output: " << err << std::endl;
+        exit(1);
+    }
+    std::shared_ptr<tc::InferRequestedOutput> output_ptr(output);
+
+    std::vector<tc::InferInput*> inputs = {input_ptr.get()};
+    std::vector<const tc::InferRequestedOutput*> outputs = {output_ptr.get()};
+
+    tc::InferOptions options(model_name);
+    options.model_version_ = model_version;
+
+    std::vector<std::unique_ptr<tc::InferResult>> results;
+    std::vector<std::vector<std::string>> result_filenames;
+    size_t image_idx = 0;
+    size_t done_cnt = 0;
+    size_t sent_count = 0;
+    bool last_request = false;
+    std::mutex mtx;
+    std::condition_variable cv;
+
+    auto callback_func = [&](tc::InferResult* result) 
+    {
+        {
+            std::lock_guard<std::mutex> lk(mtx);
+            results.emplace_back(result);
+            done_cnt++;
+        }
+        cv.notify_all();
+    };
+
+    while (!last_request) {
+        err = input_ptr->Reset();
+        if (!err.IsOk()) {
+            std::cerr << "failed resetting input: " << err << std::endl;
+            exit(1);
+        }
+
+        std::vector<std::string> input_filenames;
+        for (int idx = 0; idx < batch_size; ++idx) {
+            input_filenames.push_back(image_filenames[image_idx]);
+            err = input_ptr->AppendRaw(image_data[image_idx]);
+            if (!err.IsOk()) {
+                std::cerr << "failed setting input: " << err << std::endl;
+                exit(1);
+            }
+
+            image_idx = (image_idx + 1) % image_data.size();
+            if (image_idx == 0) {
+                last_request = true;
+            }
+        }
+
+        result_filenames.emplace_back(std::move(input_filenames));
+        options.request_id_ = std::to_string(sent_count);
+
+        double time1 = getTickCount();  
+        tc::InferResult* result;
+        if (protocol == ProtocolType::HTTP) {
+            err = triton_client.http_client_->Infer(
+                  &result, options, inputs, outputs, http_headers);
+        } else {
+            err = triton_client.grpc_client_->Infer(
+                  &result, options, inputs, outputs, http_headers);
+        }
+        if (!err.IsOk()) {
+            std::cerr << "failed sending synchronous infer request: " << err
+                      << std::endl;
+            exit(1);
+        }
+        results.emplace_back(result);
+        double time2 = getTickCount();
+        double elapsedTime = (time2 - time1)*1000 / getTickFrequency();
+        fprintf(stdout, "inference time:%f ms\n", elapsedTime);
+        sent_count++;
+    }
+
+    for (size_t idx = 0; idx < results.size(); idx++) {
+        std::cout << "Request " << idx << ", batch size " << batch_size << std::endl;
+        Postprocess(
+            std::move(results[idx]), result_filenames[idx], batch_size,
+            model_info.output_name_, model_info.max_batch_size_ != 0);
+    }
+
+    return 0;
+}
--- a/src/c++/library/CMakeLists.txt
+++ b/src/c++/library/CMakeLists.txt
+# Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required (VERSION 3.18)
+
+find_package(Threads REQUIRED)
+set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
+
+#
+# common
+#
+add_library(
+    client-common-library INTERFACE
+)
+
+target_include_directories(
+  client-common-library
+  INTERFACE
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+)
+
+#
+# json_utils
+#
+if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER OR TRITON_ENABLE_EXAMPLES)
+  find_package(RapidJSON CONFIG REQUIRED)
+  add_library(
+      json-utils-library EXCLUDE_FROM_ALL OBJECT
+      json_utils.h json_utils.cc
+  )
+  target_include_directories(
+    json-utils-library
+    PUBLIC ${RapidJSON_INCLUDE_DIRS}
+  )
+  target_link_libraries(
+    json-utils-library
+    PRIVATE
+      client-common-library
+  )
+endif()
+
+#
+# shm_utils
+#
+add_library(
+    shm-utils-library EXCLUDE_FROM_ALL OBJECT
+    shm_utils.h shm_utils.cc
+)
+target_link_libraries(
+  shm-utils-library
+  PRIVATE
+    client-common-library
+)
+
+if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
+  #
+  # libgrpcclient.so and libgrpcclient_static.a
+  #
+  configure_file(libgrpcclient.ldscript libgrpcclient.ldscript COPYONLY)
+
+  # libgrpcclient object build
+  set(
+      REQUEST_SRCS
+      grpc_client.cc common.cc
+  )
+
+  set(
+      REQUEST_HDRS
+      grpc_client.h common.h ipc.h
+  )
+
+  add_library(
+      grpc-client-library EXCLUDE_FROM_ALL OBJECT
+      ${REQUEST_SRCS} ${REQUEST_HDRS}
+  )
+  add_dependencies(
+      grpc-client-library
+      grpc-service-library proto-library
+  )
+
+  # libgrpcclient_static.a
+  add_library(
+      grpcclient_static STATIC
+      $<TARGET_OBJECTS:grpc-client-library>
+      $<TARGET_OBJECTS:grpc-service-library>
+      $<TARGET_OBJECTS:proto-library>
+  )
+  add_library(
+      TritonClient::grpcclient_static ALIAS grpcclient_static
+  )
+
+  target_include_directories(
+      grpcclient_static
+      PUBLIC
+        $<INSTALL_INTERFACE:include>
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+  )
+  target_link_libraries(
+      grpcclient_static
+      PRIVATE gRPC::grpc++
+      PRIVATE gRPC::grpc
+      PUBLIC protobuf::libprotobuf
+      PUBLIC Threads::Threads
+  )
+
+  # libgrpcclient.so
+  add_library(
+      grpcclient SHARED
+      $<TARGET_OBJECTS:grpc-service-library>
+      $<TARGET_OBJECTS:proto-library>
+      $<TARGET_OBJECTS:grpc-client-library>
+  )
+  add_library(
+      TritonClient::grpcclient ALIAS grpcclient
+  )
+
+  if (NOT WIN32 AND NOT TRITON_KEEP_TYPEINFO)
+     set_target_properties(
+       grpcclient
+       PROPERTIES
+         LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libgrpcclient.ldscript
+         LINK_FLAGS "-Wl,--version-script=libgrpcclient.ldscript"
+     )
+  endif() # NOT WIN32 AND NOT TRITON_KEEP_TYPEINFO
+
+  target_link_libraries(
+      grpcclient
+      PRIVATE gRPC::grpc++
+      PRIVATE gRPC::grpc
+      PUBLIC protobuf::libprotobuf
+      PUBLIC Threads::Threads
+  )
+
+  foreach(_client_target grpc-client-library grpcclient_static grpcclient)
+    target_compile_features(${_client_target} PRIVATE cxx_std_11)
+    target_compile_options(
+      ${_client_target} PRIVATE
+      $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
+        -Wall -Wextra -Wno-unused-parameter -Werror>
+      $<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc>
+    )
+
+    set_target_properties(
+      ${_client_target}
+      PROPERTIES
+        POSITION_INDEPENDENT_CODE ON
+    )
+
+    target_include_directories(
+      ${_client_target}
+      PUBLIC
+        $<INSTALL_INTERFACE:include>
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+        $<TARGET_PROPERTY:proto-library,INCLUDE_DIRECTORIES>
+        $<TARGET_PROPERTY:grpc-service-library,INCLUDE_DIRECTORIES>
+      PRIVATE
+        ${CMAKE_CURRENT_SOURCE_DIR}
+    )
+
+    if(TRITON_ENABLE_GPU)
+      target_compile_definitions(
+        ${_client_target}
+          PUBLIC TRITON_ENABLE_GPU=1
+      )
+    endif() # TRITON_ENABLE_GPU
+
+    if(TRITON_ENABLE_GPU)
+      target_link_libraries(
+        ${_client_target}
+        PUBLIC CUDA::cudart
+      )
+    endif() # TRITON_ENABLE_GPU
+  endforeach()
+
+  if (TRITON_USE_THIRD_PARTY)
+    if (NOT WIN32)
+      install(
+        DIRECTORY
+          ${CMAKE_CURRENT_BINARY_DIR}/../../third-party/curl/lib/
+          ${CMAKE_CURRENT_BINARY_DIR}/../../third-party/grpc/lib/
+          ${CMAKE_CURRENT_BINARY_DIR}/../../third-party/protobuf/lib/
+          ${CMAKE_CURRENT_BINARY_DIR}/../../third-party/c-ares/lib/
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        FILES_MATCHING
+          PATTERN "*\.a"
+          PATTERN "CMakeFiles" EXCLUDE
+          PATTERN "cmake" EXCLUDE
+          PATTERN "gens" EXCLUDE
+          PATTERN "libs" EXCLUDE
+          PATTERN "third_party" EXCLUDE
+      )
+    else()
+      install(
+        DIRECTORY
+          ${CMAKE_CURRENT_BINARY_DIR}/../../third-party/curl/lib/
+          ${CMAKE_CURRENT_BINARY_DIR}/../../third-party/grpc/lib/
+          ${CMAKE_CURRENT_BINARY_DIR}/../../third-party/protobuf/lib/
+          ${CMAKE_CURRENT_BINARY_DIR}/../../third-party/c-ares/lib/
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        FILES_MATCHING
+          PATTERN "*\.lib"
+          PATTERN "CMakeFiles" EXCLUDE
+          PATTERN "cmake" EXCLUDE
+          PATTERN "gens" EXCLUDE
+          PATTERN "libs" EXCLUDE
+          PATTERN "third_party" EXCLUDE
+      )
+    endif() # NOT WIN32
+
+    install(
+      DIRECTORY
+        ${CMAKE_CURRENT_BINARY_DIR}/../../third-party/grpc/include/
+        ${CMAKE_CURRENT_BINARY_DIR}/../../third-party/protobuf/include/
+        DESTINATION include
+    )
+  endif()
+
+  install(
+      FILES
+      ${CMAKE_CURRENT_SOURCE_DIR}/grpc_client.h
+      DESTINATION include
+  )
+endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
+
+if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
+  if(${TRITON_ENABLE_ZLIB})
+    find_package(ZLIB REQUIRED)
+  endif() # TRITON_ENABLE_ZLIB
+  #
+  # libhttpclient.so and libhttpclient_static.a
+  #
+  configure_file(libhttpclient.ldscript libhttpclient.ldscript COPYONLY)
+
+  # libhttpclient object build
+  set(
+      REQUEST_SRCS
+      http_client.cc common.cc cencode.c
+  )
+
+  set(
+      REQUEST_HDRS
+      http_client.h common.h ipc.h cencode.h
+  )
+
+  add_library(
+      http-client-library EXCLUDE_FROM_ALL OBJECT
+      ${REQUEST_SRCS} ${REQUEST_HDRS}
+  )
+
+  if (NOT WIN32)
+    set_property(
+      SOURCE cencode.c
+      PROPERTY COMPILE_FLAGS -Wno-implicit-fallthrough
+    )
+  endif() # NOT WIN32
+
+  target_link_libraries(
+    http-client-library
+    PUBLIC
+      triton-common-json        # from repo-common
+  )
+
+  # libhttpclient_static.a
+  add_library(
+      httpclient_static STATIC
+      $<TARGET_OBJECTS:http-client-library>
+  )
+  add_library(
+      TritonClient::httpclient_static ALIAS httpclient_static
+  )
+
+  target_link_libraries(
+      httpclient_static
+      PRIVATE triton-common-json
+      PUBLIC CURL::libcurl
+      PUBLIC Threads::Threads
+  )
+
+  if(${TRITON_ENABLE_ZLIB})
+    target_link_libraries(
+      httpclient_static
+      PRIVATE ZLIB::ZLIB
+    )
+  endif() # TRITON_ENABLE_ZLIB
+
+  # libhttpclient.so
+  add_library(
+      httpclient SHARED
+      $<TARGET_OBJECTS:http-client-library>
+  )
+  add_library(
+      TritonClient::httpclient ALIAS httpclient
+  )
+
+  if (NOT WIN32)
+     set_target_properties(
+       httpclient
+       PROPERTIES
+         LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libhttpclient.ldscript
+         LINK_FLAGS "-Wl,--version-script=libhttpclient.ldscript"
+     )
+  endif() # NOT WIN32
+
+  target_link_libraries(
+      httpclient
+      PRIVATE triton-common-json
+      PUBLIC CURL::libcurl
+      PUBLIC Threads::Threads
+  )
+
+  foreach(_client_target http-client-library httpclient_static httpclient)
+    target_compile_features(${_client_target} PRIVATE cxx_std_11)
+    target_compile_options(
+      ${_client_target} PRIVATE
+      $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
+        -Wall -Wextra -Wno-unused-parameter -Werror>
+      $<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc>
+    )
+
+    set_target_properties(
+      ${_client_target}
+      PROPERTIES
+        POSITION_INDEPENDENT_CODE ON
+    )
+
+    target_include_directories(
+      ${_client_target}
+      PUBLIC
+        $<INSTALL_INTERFACE:include>
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+        $<TARGET_PROPERTY:CURL::libcurl,INTERFACE_INCLUDE_DIRECTORIES>
+      PRIVATE
+        ${CMAKE_CURRENT_SOURCE_DIR}
+    )
+
+    target_compile_definitions(
+      ${_client_target}
+        PRIVATE CURL_STATICLIB=1
+    )
+    if(TRITON_ENABLE_GPU)
+      target_compile_definitions(
+        ${_client_target}
+          PUBLIC TRITON_ENABLE_GPU=1
+      )
+    endif() # TRITON_ENABLE_GPU
+
+    if(TRITON_ENABLE_GPU)
+      target_link_libraries(
+        ${_client_target}
+        PUBLIC CUDA::cudart
+      )
+    endif() # TRITON_ENABLE_GPU
+
+    if(${TRITON_ENABLE_ZLIB})
+      target_compile_definitions(
+        ${_client_target}
+        PUBLIC TRITON_ENABLE_ZLIB=1
+      )
+    endif() # TRITON_ENABLE_ZLIB
+  endforeach()
+
+  install(
+      FILES
+      ${CMAKE_CURRENT_SOURCE_DIR}/http_client.h
+      DESTINATION include
+  )
+endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER
+
+if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
+  install(
+      FILES
+      ${CMAKE_CURRENT_SOURCE_DIR}/common.h
+      ${CMAKE_CURRENT_SOURCE_DIR}/ipc.h
+      DESTINATION include
+  )
+
+  include(GNUInstallDirs)
+  set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonClient)
+
+  if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
+    install(
+      TARGETS
+        grpcclient
+        grpcclient_static
+      EXPORT
+        triton-client-targets
+      LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+      ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+      RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+  )
+  endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
+
+  if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
+    install(
+      TARGETS
+        httpclient
+        httpclient_static
+      EXPORT
+        triton-client-targets
+      LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+      ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+      RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+  )
+  endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER
+
+  install(
+    EXPORT
+      triton-client-targets
+    FILE
+      TritonClientTargets.cmake
+    NAMESPACE
+      TritonClient::
+    DESTINATION
+      ${INSTALL_CONFIGDIR}
+  )
+
+  include(CMakePackageConfigHelpers)
+  configure_package_config_file(
+    ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonClientConfig.cmake.in
+    ${CMAKE_CURRENT_BINARY_DIR}/TritonClientConfig.cmake
+    INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
+  )
+
+  install(
+    FILES
+      ${CMAKE_CURRENT_BINARY_DIR}/TritonClientConfig.cmake
+    DESTINATION
+      ${INSTALL_CONFIGDIR}
+  )
+
+  #
+  # Export from build tree
+  #
+  export(
+    EXPORT
+      triton-client-targets
+    FILE
+      ${CMAKE_CURRENT_BINARY_DIR}/TritonClientTargets.cmake
+    NAMESPACE
+      TritonClient::
+  )
+
+  export(PACKAGE TritonClient)
+
+endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
--- a/src/c++/library/cencode.c
+++ b/src/c++/library/cencode.c
+/*
+cencoder.c - c source to a base64 encoding algorithm implementation
+
+This is part of the libb64 project, and has been placed in the public domain.
+For details, see http://sourceforge.net/projects/libb64
+*/
+
+#include "cencode.h"
+
+const int CHARS_PER_LINE = 72;
+
+void
+base64_init_encodestate(base64_encodestate* state_in)
+{
+  state_in->step = step_A;
+  state_in->result = 0;
+  state_in->stepcount = 0;
+}
+
+char
+base64_encode_value(char value_in)
+{
+  static const char* encoding =
+      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+  if (value_in > 63)
+    return '=';
+  return encoding[(int)value_in];
+}
+
+int
+base64_encode_block(
+    const char* plaintext_in, int length_in, char* code_out,
+    base64_encodestate* state_in)
+{
+  const char* plainchar = plaintext_in;
+  const char* const plaintextend = plaintext_in + length_in;
+  char* codechar = code_out;
+  char result;
+  char fragment;
+
+  result = state_in->result;
+
+  switch (state_in->step) {
+    while (1) {
+      case step_A:
+        if (plainchar == plaintextend) {
+          state_in->result = result;
+          state_in->step = step_A;
+          return codechar - code_out;
+        }
+        fragment = *plainchar++;
+        result = (fragment & 0x0fc) >> 2;
+        *codechar++ = base64_encode_value(result);
+        result = (fragment & 0x003) << 4;
+      case step_B:
+        if (plainchar == plaintextend) {
+          state_in->result = result;
+          state_in->step = step_B;
+          return codechar - code_out;
+        }
+        fragment = *plainchar++;
+        result |= (fragment & 0x0f0) >> 4;
+        *codechar++ = base64_encode_value(result);
+        result = (fragment & 0x00f) << 2;
+      case step_C:
+        if (plainchar == plaintextend) {
+          state_in->result = result;
+          state_in->step = step_C;
+          return codechar - code_out;
+        }
+        fragment = *plainchar++;
+        result |= (fragment & 0x0c0) >> 6;
+        *codechar++ = base64_encode_value(result);
+        result = (fragment & 0x03f) >> 0;
+        *codechar++ = base64_encode_value(result);
+
+        ++(state_in->stepcount);
+        if (state_in->stepcount == CHARS_PER_LINE / 4) {
+          *codechar++ = '\n';
+          state_in->stepcount = 0;
+        }
+    }
+  }
+  /* control should not reach here */
+  return codechar - code_out;
+}
+
+int
+base64_encode_blockend(char* code_out, base64_encodestate* state_in)
+{
+  char* codechar = code_out;
+
+  switch (state_in->step) {
+    case step_B:
+      *codechar++ = base64_encode_value(state_in->result);
+      *codechar++ = '=';
+      *codechar++ = '=';
+      break;
+    case step_C:
+      *codechar++ = base64_encode_value(state_in->result);
+      *codechar++ = '=';
+      break;
+    case step_A:
+      break;
+  }
+  *codechar++ = '\n';
+
+  return codechar - code_out;
+}
--- a/src/c++/library/cencode.h
+++ b/src/c++/library/cencode.h
+/*
+cencode.h - c header for a base64 encoding algorithm
+
+This is part of the libb64 project, and has been placed in the public domain.
+For details, see http://sourceforge.net/projects/libb64
+*/
+
+#ifndef BASE64_CENCODE_H
+#define BASE64_CENCODE_H
+
+typedef enum { step_A, step_B, step_C } base64_encodestep;
+
+typedef struct {
+  base64_encodestep step;
+  char result;
+  int stepcount;
+} base64_encodestate;
+
+void base64_init_encodestate(base64_encodestate* state_in);
+
+char base64_encode_value(char value_in);
+
+int base64_encode_block(
+    const char* plaintext_in, int length_in, char* code_out,
+    base64_encodestate* state_in);
+
+int base64_encode_blockend(char* code_out, base64_encodestate* state_in);
+
+#endif /* BASE64_CENCODE_H */
--- a/src/c++/library/cmake/TRITONConfig.cmake.in
+++ b/src/c++/library/cmake/TRITONConfig.cmake.in
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# specific version required for protobuf
+if(NOT PROTOBUF_FOUND AND NOT Protobuf_FOUND)
+  set(protobuf_MODULE_COMPATIBLE TRUE CACHE BOOL "protobuf_MODULE_COMPATIBLE" FORCE)
+  find_package(Protobuf @Protobuf_VERSION@ CONFIG REQUIRED)
+endif()
+
+# specific version required for grpc
+if(NOT GRPC_FOUND AND NOT gRPC_FOUND)
+  find_package(gRPC @gRPC_VERSION@ CONFIG REQUIRED)
+endif()
+
+if(NOT CURL_FOUND)
+  if(${TRITON_CURL_WITHOUT_CONFIG})
+    find_package(CURL REQUIRED)
+  else()
+    find_package(CURL CONFIG REQUIRED)
+  endif() # TRITON_CURL_WITHOUT_CONFIG
+endif()
+
+if(NOT Threads_FOUND)
+  find_package(Threads REQUIRED)
+endif()
+
+if (NOT (TARGET TRITON::grpcclient OR TARGET TRITON::httpclient))
+  include ("${CMAKE_CURRENT_LIST_DIR}/TRITONTargets.cmake")
+endif ()
--- a/src/c++/library/cmake/TritonClientConfig.cmake.in
+++ b/src/c++/library/cmake/TritonClientConfig.cmake.in
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TRITONCLIENT_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TRITONCLIENT_CMAKE_DIR})
+
+if(NOT TARGET TritonClient::triton-client)
+  include("${TRITONCLIENT_CMAKE_DIR}/TritonClientTargets.cmake")
+endif()
--- a/src/c++/library/common.cc
+++ b/src/c++/library/common.cc
+// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "common.h"
+
+namespace triton { namespace client {
+
+//==============================================================================
+
+const Error Error::Success("");
+
+Error::Error(const std::string& msg) : msg_(msg) {}
+
+std::ostream&
+operator<<(std::ostream& out, const Error& err)
+{
+  if (!err.msg_.empty()) {
+    out << err.msg_;
+  }
+  return out;
+}
+
+//==============================================================================
+
+Error
+InferenceServerClient::ClientInferStat(InferStat* infer_stat) const
+{
+  *infer_stat = infer_stat_;
+  return Error::Success;
+}
+
+Error
+InferenceServerClient::UpdateInferStat(const RequestTimers& timer)
+{
+  const uint64_t request_time_ns = timer.Duration(
+      RequestTimers::Kind::REQUEST_START, RequestTimers::Kind::REQUEST_END);
+  const uint64_t send_time_ns = timer.Duration(
+      RequestTimers::Kind::SEND_START, RequestTimers::Kind::SEND_END);
+  const uint64_t recv_time_ns = timer.Duration(
+      RequestTimers::Kind::RECV_START, RequestTimers::Kind::RECV_END);
+
+  if ((request_time_ns == std::numeric_limits<uint64_t>::max()) ||
+      (send_time_ns == std::numeric_limits<uint64_t>::max()) ||
+      (recv_time_ns == std::numeric_limits<uint64_t>::max())) {
+    return Error(
+        "Timer not set correctly." +
+        ((timer.Timestamp(RequestTimers::Kind::REQUEST_START) >
+          timer.Timestamp(RequestTimers::Kind::REQUEST_END))
+             ? (" Request time from " +
+                std::to_string(
+                    timer.Timestamp(RequestTimers::Kind::REQUEST_START)) +
+                " to " +
+                std::to_string(
+                    timer.Timestamp(RequestTimers::Kind::REQUEST_END)) +
+                ".")
+             : "") +
+        ((timer.Timestamp(RequestTimers::Kind::SEND_START) >
+          timer.Timestamp(RequestTimers::Kind::SEND_END))
+             ? (" Send time from " +
+                std::to_string(
+                    timer.Timestamp(RequestTimers::Kind::SEND_START)) +
+                " to " +
+                std::to_string(timer.Timestamp(RequestTimers::Kind::SEND_END)) +
+                ".")
+             : "") +
+        ((timer.Timestamp(RequestTimers::Kind::RECV_START) >
+          timer.Timestamp(RequestTimers::Kind::RECV_END))
+             ? (" Receive time from " +
+                std::to_string(
+                    timer.Timestamp(RequestTimers::Kind::RECV_START)) +
+                " to " +
+                std::to_string(timer.Timestamp(RequestTimers::Kind::RECV_END)) +
+                ".")
+             : ""));
+  }
+
+  infer_stat_.completed_request_count++;
+  infer_stat_.cumulative_total_request_time_ns += request_time_ns;
+  infer_stat_.cumulative_send_time_ns += send_time_ns;
+  infer_stat_.cumulative_receive_time_ns += recv_time_ns;
+
+  return Error::Success;
+}
+
+//==============================================================================
+
+Error
+InferInput::Create(
+    InferInput** infer_input, const std::string& name,
+    const std::vector<int64_t>& dims, const std::string& datatype)
+{
+  *infer_input = new InferInput(name, dims, datatype);
+  return Error::Success;
+}
+
+Error
+InferInput::SetShape(const std::vector<int64_t>& shape)
+{
+  shape_ = shape;
+  return Error::Success;
+}
+
+Error
+InferInput::Reset()
+{
+  bufs_.clear();
+  buf_byte_sizes_.clear();
+  str_bufs_.clear();
+  bufs_idx_ = 0;
+  byte_size_ = 0;
+  io_type_ = NONE;
+  return Error::Success;
+}
+
+Error
+InferInput::AppendRaw(const std::vector<uint8_t>& input)
+{
+  return AppendRaw(&input[0], input.size());
+}
+
+Error
+InferInput::AppendRaw(const uint8_t* input, size_t input_byte_size)
+{
+  byte_size_ += input_byte_size;
+
+  bufs_.push_back(input);
+  buf_byte_sizes_.push_back(input_byte_size);
+  io_type_ = RAW;
+
+  return Error::Success;
+}
+
+Error
+InferInput::SetSharedMemory(
+    const std::string& name, size_t byte_size, size_t offset)
+{
+  shm_name_ = name;
+  shm_offset_ = offset;
+  byte_size_ = byte_size;
+  io_type_ = SHARED_MEMORY;
+
+  return Error::Success;
+}
+
+Error
+InferInput::AppendFromString(const std::vector<std::string>& input)
+{
+  // Serialize the strings into a "raw" buffer. The first 4-bytes are
+  // the length of the string length. Next are the actual string
+  // characters. There is *not* a null-terminator on the string.
+  str_bufs_.emplace_back();
+  std::string& sbuf = str_bufs_.back();
+  for (const auto& str : input) {
+    uint32_t len = str.size();
+    sbuf.append(reinterpret_cast<const char*>(&len), sizeof(uint32_t));
+    sbuf.append(str);
+  }
+
+  return AppendRaw(reinterpret_cast<const uint8_t*>(&sbuf[0]), sbuf.size());
+}
+
+Error
+InferInput::ByteSize(size_t* byte_size) const
+{
+  *byte_size = byte_size_;
+  return Error::Success;
+}
+
+InferInput::InferInput(
+    const std::string& name, const std::vector<int64_t>& shape,
+    const std::string& datatype)
+    : name_(name), shape_(shape), datatype_(datatype), byte_size_(0),
+      bufs_idx_(0), buf_pos_(0), io_type_(NONE), shm_name_(""), shm_offset_(0)
+{
+}
+
+Error
+InferInput::SharedMemoryInfo(
+    std::string* name, size_t* byte_size, size_t* offset) const
+{
+  if (io_type_ != SHARED_MEMORY) {
+    return Error("The input has not been set with the shared memory.");
+  }
+  *name = shm_name_;
+  *offset = shm_offset_;
+  *byte_size = byte_size_;
+
+  return Error::Success;
+}
+
+Error
+InferInput::SetBinaryData(const bool binary_data)
+{
+  binary_data_ = binary_data;
+  return Error::Success;
+}
+
+Error
+InferInput::PrepareForRequest()
+{
+  // Reset position so request sends entire input.
+  bufs_idx_ = 0;
+  buf_pos_ = 0;
+  return Error::Success;
+}
+
+Error
+InferInput::GetNext(
+    uint8_t* buf, size_t size, size_t* input_bytes, bool* end_of_input)
+{
+  size_t total_size = 0;
+
+  while ((bufs_idx_ < bufs_.size()) && (size > 0)) {
+    const size_t buf_byte_size = buf_byte_sizes_[bufs_idx_];
+    const size_t csz = (std::min)(buf_byte_size - buf_pos_, size);
+    if (csz > 0) {
+      const uint8_t* input_ptr = bufs_[bufs_idx_] + buf_pos_;
+      std::copy(input_ptr, input_ptr + csz, buf);
+      buf_pos_ += csz;
+      buf += csz;
+      size -= csz;
+      total_size += csz;
+    }
+
+    if (buf_pos_ == buf_byte_size) {
+      bufs_idx_++;
+      buf_pos_ = 0;
+    }
+  }
+
+  *input_bytes = total_size;
+  *end_of_input = (bufs_idx_ >= bufs_.size());
+
+  return Error::Success;
+}
+
+Error
+InferInput::GetNext(
+    const uint8_t** buf, size_t* input_bytes, bool* end_of_input)
+{
+  if (bufs_idx_ < bufs_.size()) {
+    *buf = bufs_[bufs_idx_];
+    *input_bytes = buf_byte_sizes_[bufs_idx_];
+    bufs_idx_++;
+  } else {
+    *buf = nullptr;
+    *input_bytes = 0;
+  }
+  *end_of_input = (bufs_idx_ >= bufs_.size());
+
+  return Error::Success;
+}
+
+//==============================================================================
+
+Error
+InferRequestedOutput::Create(
+    InferRequestedOutput** infer_output, const std::string& name,
+    const size_t class_count)
+{
+  *infer_output = new InferRequestedOutput(name, class_count);
+  return Error::Success;
+}
+
+Error
+InferRequestedOutput::SetSharedMemory(
+    const std::string& region_name, const size_t byte_size, const size_t offset)
+{
+  shm_name_ = region_name;
+  shm_byte_size_ = byte_size;
+  shm_offset_ = offset;
+  io_type_ = SHARED_MEMORY;
+
+  return Error::Success;
+}
+
+Error
+InferRequestedOutput::UnsetSharedMemory()
+{
+  shm_name_ = "";
+  shm_byte_size_ = 0;
+  shm_offset_ = 0;
+  io_type_ = NONE;
+
+  return Error::Success;
+}
+
+InferRequestedOutput::InferRequestedOutput(
+    const std::string& name, const size_t class_count)
+    : name_(name), class_count_(class_count), io_type_(NONE)
+{
+}
+
+Error
+InferRequestedOutput::SharedMemoryInfo(
+    std::string* name, size_t* byte_size, size_t* offset) const
+{
+  if (io_type_ != SHARED_MEMORY) {
+    return Error("The input has not been set with the shared memory.");
+  }
+
+  *name = shm_name_;
+  *byte_size = shm_byte_size_;
+  *offset = shm_offset_;
+
+  return Error::Success;
+}
+
+Error
+InferRequestedOutput::SetBinaryData(const bool binary_data)
+{
+  binary_data_ = binary_data;
+  return Error::Success;
+}
+
+//==============================================================================
+
+}}  // namespace triton::client