Initial commit

c68e1835 · lijian6 · c68e1835 · c68e1835 · c68e1835 · c68e1835
Commit c68e1835 authored Sep 18, 2023 by lijian6
20 changed files
--- a/.clang-format
+++ b/.clang-format
+---
+BasedOnStyle: Google
+
+IndentWidth: 2
+ColumnLimit: 80
+ContinuationIndentWidth: 4
+UseTab: Never
+MaxEmptyLinesToKeep: 2
+
+SortIncludes: true
+CompactNamespaces: true
+ReflowComments: true
+
+DerivePointerAlignment: false
+PointerAlignment: Left
+
+AllowShortIfStatementsOnASingleLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Inline
+
+AlwaysBreakAfterReturnType: TopLevelDefinitions
+AlignAfterOpenBracket: AlwaysBreak
+BreakBeforeBraces: Custom
+BraceWrapping:
+  AfterClass: false
+  AfterControlStatement: false
+  AfterEnum: false
+  AfterFunction: true
+  AfterNamespace: false
+  AfterStruct: false
+  AfterUnion: false
+  BeforeCatch: true
+
+BinPackArguments: true
+BinPackParameters: true
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+
+IndentCaseLabels: true
+
--- a/.gitignore
+++ b/.gitignore
+#VSCode
+/.vscode
+src/.vscode
+src/c++/.vscode
+src/python/.vscode
+
+#C++
+/build
+*.so
+src/c++/perf_analyzer/builddir/
+src/c++/perf_analyzer/.vscode/
+
+#Python
+__pycache__/
+*.pyc
+
+#Other
+node_modules
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+exclude: ^src/grpc_generated
+repos:
+- repo: https://github.com/timothycrosley/isort
+  rev: 5.12.0
+  hooks:
+  - id: isort
+    additional_dependencies: [toml]
+- repo: https://github.com/psf/black
+  rev: 23.1.0
+  hooks:
+  - id: black
+    types_or: [python, cython]
+- repo: https://github.com/PyCQA/flake8
+  rev: 5.0.4
+  hooks:
+  - id: flake8
+    args: [--max-line-length=88, --select=C,E,F,W,B,B950, --extend-ignore = E203,E501]
+    types_or: [python, cython]
+- repo: https://github.com/pre-commit/mirrors-clang-format
+  rev: v16.0.5
+  hooks:
+  - id: clang-format
+    types_or: [c, c++, cuda, proto, textproto, java]
+    args: ["-fallback-style=none", "-style=file", "-i"]
+- repo: https://github.com/codespell-project/codespell
+  rev: v2.2.4
+  hooks:
+  - id: codespell
+    additional_dependencies: [tomli]
+    args: ["--toml", "pyproject.toml"]
+    exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$)
+# More details about these pre-commit hooks here:
+# https://pre-commit.com/hooks.html
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.4.0
+  hooks:
+  - id: check-case-conflict
+  - id: check-executables-have-shebangs
+  - id: check-merge-conflict
+  - id: check-json
+  - id: check-toml
+  - id: check-yaml
+  - id: check-shebang-scripts-are-executable
+  - id: end-of-file-fixer
+    types_or: [c, c++, cuda, proto, textproto, java, python]
+  - id: mixed-line-ending
+  - id: requirements-txt-fixer
+  - id: trailing-whitespace
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required(VERSION 3.17)
+
+project(tritonclient LANGUAGES C CXX)
+
+#
+# Options
+#
+set(TRITON_VERSION "0.0.0" CACHE STRING "Version for the clients")
+set(PERF_ANALYZER_VERSION ${TRITON_VERSION} CACHE STRING "Build Version for Perf Analyzer")
+option(TRITON_ENABLE_CC_HTTP "Build C++ HTTP client libraries" OFF)
+option(TRITON_ENABLE_CC_GRPC "Build C++ GRPC client libraries" OFF)
+option(TRITON_ENABLE_PYTHON_HTTP "Enable Python HTTP client libraries" OFF)
+option(TRITON_ENABLE_PYTHON_GRPC "Enable Python GRPC client libraries" OFF)
+option(TRITON_ENABLE_JAVA_HTTP "Enable JAVA HTTP client libraries" OFF)
+option(TRITON_ENABLE_PERF_ANALYZER "Enable Performance Analyzer" OFF)
+option(TRITON_ENABLE_PERF_ANALYZER_C_API "Enable Performance Analyzer C API" OFF)
+option(TRITON_ENABLE_PERF_ANALYZER_TFS "Enable TensorFlow Serving support for Performance Analyzer" OFF)
+option(TRITON_ENABLE_PERF_ANALYZER_TS "Enable TorchServe support for Performance Analyzer" OFF)
+option(TRITON_ENABLE_EXAMPLES "Include examples in build" OFF)
+option(TRITON_ENABLE_TESTS "Include tests in build" OFF)
+option(TRITON_ENABLE_GPU "Enable GPU support in libraries" OFF)
+option(TRITON_ENABLE_ZLIB "Include ZLIB library in build" ON)
+
+set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
+set(TRITON_THIRD_PARTY_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/third_party repo")
+set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
+
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Release)
+endif()
+
+if(NOT TRITON_ENABLE_PYTHON_GRPC)
+  set(TRITON_COMMON_ENABLE_PROTOBUF_PYTHON OFF)
+endif()
+
+#
+# Dependencies
+#
+include(FetchContent)
+
+FetchContent_Declare(
+  repo-third-party
+  GIT_REPOSITORY https://github.com/triton-inference-server/third_party.git
+  GIT_TAG ${TRITON_THIRD_PARTY_REPO_TAG}
+  GIT_SHALLOW ON
+)
+set(TRITON_THIRD_PARTY_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/third-party)
+FetchContent_MakeAvailable(repo-third-party)
+
+# Some libs are installed to ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib64 instead
+# of ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib on Centos
+set (LIB_DIR "lib")
+# /etc/os-release does not exist on Windows
+if(EXISTS "/etc/os-release")
+  file(STRINGS /etc/os-release DISTRO REGEX "^NAME=")
+  string(REGEX REPLACE "NAME=\"(.*)\"" "\\1" DISTRO "${DISTRO}")
+  message(STATUS "Distro Name: ${DISTRO}")
+  if(DISTRO MATCHES "CentOS.*")
+    set (LIB_DIR "lib64")
+  endif()
+endif()
+
+# Need to use ExternalProject for our builds so that we can get the
+# correct dependencies between our components and the ExternalProject
+# dependencies (found in the third_party repo)
+include(ExternalProject)
+
+if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+  set(TRITON_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/cc-clients/install)
+else()
+  set(TRITON_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
+endif()
+
+set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "")
+if (OPENSSL_ROOT_DIR)
+  set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "-DOPENSSL_ROOT_DIR:PATH=${OPENSSL_ROOT_DIR}")
+endif()
+
+set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "")
+if (CMAKE_TOOLCHAIN_FILE)
+  set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "-DCMAKE_TOOLCHAIN_FILE:PATH=${CMAKE_TOOLCHAIN_FILE}")
+endif()
+
+set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "")
+if (VCPKG_TARGET_TRIPLET)
+  set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "-DVCPKG_TARGET_TRIPLET:STRING=${VCPKG_TARGET_TRIPLET}")
+endif()
+
+# Location where protobuf-config.cmake will be installed varies by
+# platform
+if (WIN32)
+  set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/cmake")
+else()
+  set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/${LIB_DIR}/cmake/protobuf")
+endif()
+
+if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER OR TRITON_ENABLE_PERF_ANALYZER_C_API)
+
+  set(_cc_client_depends "")
+  if(${TRITON_ENABLE_CC_HTTP})
+    set(_cc_client_depends ${_cc_client_depends} curl)
+  endif() # TRITON_ENABLE_CC_HTTP
+  if(${TRITON_ENABLE_CC_GRPC} OR ${TRITON_ENABLE_PERF_ANALYZER})
+    set(_cc_client_depends ${_cc_client_depends} grpc protobuf)
+  endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
+
+  if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_C_API})
+    message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_C_API=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
+  endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_C_API
+  if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_TFS})
+    message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_TFS=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
+  endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_TFS
+  if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_TS})
+    message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_TS=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
+  endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_TS
+
+  ExternalProject_Add(cc-clients
+    PREFIX cc-clients
+    SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/c++"
+    BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/cc-clients"
+    CMAKE_CACHE_ARGS
+      ${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
+      ${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
+      ${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
+      -DCURL_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/curl/${LIB_DIR}/cmake/CURL
+      -DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
+      -DgRPC_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/grpc/lib/cmake/grpc
+      -Dabsl_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/absl/${LIB_DIR}/cmake/absl
+      -Dc-ares_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/c-ares/${LIB_DIR}/cmake/c-ares
+      -DGTEST_ROOT:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/googletest
+      -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG}
+      -DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG}
+      -DPERF_ANALYZER_VERSION:STRING=${PERF_ANALYZER_VERSION}
+      -DTRITON_ENABLE_CC_HTTP:BOOL=${TRITON_ENABLE_CC_HTTP}
+      -DTRITON_ENABLE_CC_GRPC:BOOL=${TRITON_ENABLE_CC_GRPC}
+      -DTRITON_ENABLE_PERF_ANALYZER:BOOL=${TRITON_ENABLE_PERF_ANALYZER}
+      -DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
+      -DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
+      -DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
+      -DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
+      -DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
+      -DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
+      -DTRITON_ENABLE_ZLIB:BOOL=${TRITON_ENABLE_ZLIB}
+      -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
+      -DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
+    DEPENDS ${_cc_client_depends}
+  )
+endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
+
+if(TRITON_ENABLE_PYTHON_HTTP OR TRITON_ENABLE_PYTHON_GRPC)
+  set(_py_client_depends "")
+  if(${TRITON_ENABLE_PYTHON_GRPC})
+    set(_py_client_depends ${_py_client_depends} grpc protobuf)
+  endif() # TRITON_ENABLE_PYTHON_GRPC
+  if(${TRITON_ENABLE_PERF_ANALYZER})
+    set(_py_client_depends ${_py_client_depends} cc-clients)
+  endif() # TRITON_ENABLE_PERF_ANALYZER
+
+  ExternalProject_Add(python-clients
+    PREFIX python-clients
+    SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/python"
+    BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/python-clients"
+    CMAKE_CACHE_ARGS
+      ${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
+      ${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
+      ${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
+      -DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
+      -DgRPC_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/grpc/lib/cmake/grpc
+      -Dabsl_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/absl/${LIB_DIR}/cmake/absl
+      -Dc-ares_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/c-ares/${LIB_DIR}/cmake/c-ares
+      -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG}
+      -DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG}
+      -DTRITON_VERSION:STRING=${TRITON_VERSION}
+      -DTRITON_ENABLE_PYTHON_HTTP:BOOL=${TRITON_ENABLE_PYTHON_HTTP}
+      -DTRITON_ENABLE_PYTHON_GRPC:BOOL=${TRITON_ENABLE_PYTHON_GRPC}
+      -DTRITON_ENABLE_PERF_ANALYZER:BOOL=${TRITON_ENABLE_PERF_ANALYZER}
+      -DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
+      -DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
+      -DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
+      -DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
+      -DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
+      -DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
+      -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
+      -DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
+    DEPENDS ${_py_client_depends}
+  )
+endif() # TRITON_ENABLE_PYTHON_HTTP OR TRITON_ENABLE_PYTHON_GRPC
+
+if(TRITON_ENABLE_JAVA_HTTP)
+  ExternalProject_Add(java-clients
+    PREFIX java-clients
+    SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/java"
+    BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/java-clients"
+    CMAKE_CACHE_ARGS
+      ${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
+      ${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
+      ${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
+      -DTRITON_VERSION:STRING=${TRITON_VERSION}
+      -DTRITON_ENABLE_JAVA_HTTP:BOOL=${TRITON_ENABLE_JAVA_HTTP}
+      -DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
+      -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
+      -DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
+    INSTALL_COMMAND ""
+  )
+
+endif() # TRITON_ENABLE_JAVA_HTTP
--- a/LICENSE
+++ b/LICENSE
+Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+ * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+ * Neither the name of NVIDIA CORPORATION nor the names of its
+   contributors may be used to endorse or promote products derived
+   from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/README_ORIGIN.md
+++ b/README_ORIGIN.md
--- a/pyproject.toml
+++ b/pyproject.toml
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+[tool.codespell]
+# note: pre-commit passes explicit lists of files here, which this skip file list doesn't override -
+# this is only to allow you to run codespell interactively
+# this also overrides the grpc_generated folder, since it is generated
+skip = "./.git,./.github,./src/grpc_generated"
+# ignore short words, and typename parameters like OffsetT
+ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
+# ignore allowed words
+# ignoring atleast to avoid testing::AtLeast from getting flagged
+ignore-words-list = "atleast"
+# use the 'clear' dictionary for unambiguous spelling mistakes
+builtin = "clear"
+# disable warnings about binary files and wrong encoding
+quiet-level = 3
+
+[tool.isort]
+profile = "black"
+use_parentheses = true
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+ensure_newline_before_comments = true
+line_length = 88
+balanced_wrapping = true
+indent = "    "
+skip = ["build"]
+
--- a/src/c++/CMakeLists.txt
+++ b/src/c++/CMakeLists.txt
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required(VERSION 3.17)
+
+project(cc-clients LANGUAGES C CXX)
+
+#
+# Options
+#
+option(TRITON_ENABLE_CC_HTTP "Build C++ HTTP client libraries" OFF)
+option(TRITON_ENABLE_CC_GRPC "Build C++ GRPC client libraries" OFF)
+option(TRITON_ENABLE_PERF_ANALYZER "Enable Performance Analyzer" OFF)
+option(TRITON_ENABLE_EXAMPLES "Include examples in build" OFF)
+option(TRITON_ENABLE_TESTS "Include tests in build" OFF)
+option(TRITON_ENABLE_GPU "Enable GPU support in libraries" OFF)
+option(TRITON_USE_THIRD_PARTY "Use local version of third party libraries" ON)
+option(TRITON_KEEP_TYPEINFO "Keep typeinfo symbols by disabling ldscript" OFF)
+option(TRITON_ENABLE_ZLIB "Include ZLIB library in build" ON)
+
+set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
+set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
+
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Release)
+endif()
+
+#
+# Dependencies
+#
+include(FetchContent)
+
+FetchContent_Declare(
+  repo-common
+  GIT_REPOSITORY https://github.com/triton-inference-server/common.git
+  GIT_TAG ${TRITON_COMMON_REPO_TAG}
+  GIT_SHALLOW ON
+)
+
+FetchContent_Declare(
+  googletest
+  URL https://github.com/google/googletest/archive/9406a60c7839052e4944ea4dbc8344762a89f9bd.zip
+)
+
+if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
+  set(TRITON_COMMON_ENABLE_PROTOBUF ON)
+  set(TRITON_COMMON_ENABLE_GRPC ON)
+
+  if(TRITON_ENABLE_PERF_ANALYZER)
+    FetchContent_Declare(
+      repo-core
+      GIT_REPOSITORY https://github.com/triton-inference-server/core.git
+      GIT_TAG ${TRITON_CORE_REPO_TAG}
+      GIT_SHALLOW ON
+    )
+    FetchContent_MakeAvailable(repo-core)
+  endif() # TRITON_ENABLE_PERF_ANALYZER
+endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
+
+if(NOT TRITON_ENABLE_PERF_ANALYZER AND NOT TRITON_ENABLE_CC_HTTP AND NOT TRITON_ENABLE_EXAMPLES)
+  set(TRITON_COMMON_ENABLE_JSON OFF)
+endif()
+
+if(TRITON_ENABLE_TESTS OR TRITON_ENABLE_PERF_ANALYZER)
+  FetchContent_MakeAvailable(googletest)
+endif()
+FetchContent_MakeAvailable(repo-common)
+
+if(TRITON_ENABLE_TESTS)
+  include_directories(
+    ${repo-common_SOURCE_DIR}/include
+  )
+endif() # TRITON_ENABLE_TESTS
+
+#
+# CUDA
+#
+if(TRITON_ENABLE_GPU)
+  find_package(CUDAToolkit REQUIRED)
+endif() # TRITON_ENABLE_GPU
+
+#
+# libcurl
+#
+if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
+  find_package(CURL REQUIRED)
+  message(STATUS "Using curl ${CURL_VERSION}")
+endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER
+
+#
+# Protobuf
+#
+if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
+  set(protobuf_MODULE_COMPATIBLE TRUE CACHE BOOL "protobuf_MODULE_COMPATIBLE" FORCE)
+  find_package(Protobuf CONFIG REQUIRED)
+  message(STATUS "Using protobuf ${Protobuf_VERSION}")
+  include_directories(${Protobuf_INCLUDE_DIRS})
+endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
+
+#
+# GRPC
+#
+if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
+  find_package(gRPC CONFIG REQUIRED)
+  message(STATUS "Using gRPC ${gRPC_VERSION}")
+  include_directories($<TARGET_PROPERTY:gRPC::grpc,INTERFACE_INCLUDE_DIRECTORIES>)
+endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
+
+if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
+  add_subdirectory(library)
+endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
+
+if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC)
+  if(TRITON_ENABLE_EXAMPLES)
+    add_subdirectory(examples)
+  endif() # TRITON_ENABLE_EXAMPLES
+
+  if(TRITON_ENABLE_TESTS)
+    add_subdirectory(tests)
+  endif() # TRITON_ENABLE_TESTS
+endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC
+
+if(TRITON_ENABLE_PERF_ANALYZER)
+  add_subdirectory(perf_analyzer)
+endif() # TRITON_ENABLE_PERF_ANALYZER
--- a/src/c++/examples/CMakeLists.txt
+++ b/src/c++/examples/CMakeLists.txt
+# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required (VERSION 3.18)
+
+if(WIN32)
+  message("C++ examples are not currently supported on Windows because "
+          "they require functionalities that are UNIX specific.")
+else()
+if(TRITON_ENABLE_CC_HTTP AND TRITON_ENABLE_CC_GRPC)
+  #
+  # yolov7-tiny
+  #
+  find_package(OpenCV REQUIRED)
+  add_executable(
+    yolov7-tiny
+    yolov7-tiny.cc
+    $<TARGET_OBJECTS:json-utils-library>
+  )
+  target_include_directories(
+    yolov7-tiny
+    PRIVATE ${OpenCV_INCLUDE_DIRS}
+  )
+  target_link_libraries(
+    yolov7-tiny
+    PRIVATE
+      grpcclient_static
+      httpclient_static
+      ${OpenCV_LIBS}
+  )
+  install(
+    TARGETS yolov7-tiny
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # resnet50
+  #
+  find_package(OpenCV REQUIRED)
+  add_executable(
+    resnet50
+    resnet50.cc
+    $<TARGET_OBJECTS:json-utils-library>
+  )
+  target_include_directories(
+    resnet50
+    PRIVATE ${OpenCV_INCLUDE_DIRS}
+  )
+  target_link_libraries(
+    resnet50
+    PRIVATE
+      grpcclient_static
+      httpclient_static
+      ${OpenCV_LIBS}
+  )
+  install(
+    TARGETS resnet50
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # image_client
+  #
+  find_package(OpenCV REQUIRED)
+  add_executable(
+    image_client
+    image_client.cc
+    $<TARGET_OBJECTS:json-utils-library>
+  )
+  target_include_directories(
+    image_client
+    PRIVATE ${OpenCV_INCLUDE_DIRS}
+  )
+  target_link_libraries(
+    image_client
+    PRIVATE
+      grpcclient_static
+      httpclient_static
+      ${OpenCV_LIBS}
+  )
+  install(
+    TARGETS image_client
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # ensemble_image_client
+  #
+  add_executable(
+    ensemble_image_client
+    ensemble_image_client.cc
+    $<TARGET_OBJECTS:json-utils-library>
+  )
+  target_link_libraries(
+    ensemble_image_client
+    PRIVATE
+      grpcclient_static
+      httpclient_static
+  )
+  install(
+    TARGETS ensemble_image_client
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # reuse_infer_objects_client
+  #
+  add_executable(
+    reuse_infer_objects_client
+    reuse_infer_objects_client.cc
+    $<TARGET_OBJECTS:shm-utils-library>
+  )
+  target_link_libraries(
+    reuse_infer_objects_client
+    PRIVATE
+      grpcclient_static
+      httpclient_static
+  )
+  install(
+    TARGETS reuse_infer_objects_client
+    RUNTIME DESTINATION bin
+  )
+endif() # TRITON_ENABLE_CC_HTTP AND TRITON_ENABLE_CC_GRPC
+
+if(TRITON_ENABLE_CC_GRPC)
+  #
+  # simple_grpc_health_metadata
+  #
+  add_executable(simple_grpc_health_metadata simple_grpc_health_metadata.cc)
+  target_link_libraries(
+    simple_grpc_health_metadata
+    PRIVATE
+      grpcclient_static
+  )
+  install(
+    TARGETS simple_grpc_health_metadata
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_grpc_model_control
+  #
+  add_executable(simple_grpc_model_control simple_grpc_model_control.cc)
+  target_link_libraries(
+    simple_grpc_model_control
+    PRIVATE
+      grpcclient_static
+  )
+  install(
+    TARGETS simple_grpc_model_control
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_grpc_infer_client
+  #
+  add_executable(simple_grpc_infer_client simple_grpc_infer_client.cc)
+  target_link_libraries(
+    simple_grpc_infer_client
+    PRIVATE
+      grpcclient_static
+  )
+  install(
+    TARGETS simple_grpc_infer_client
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_grpc_keepalive_client
+  #
+  add_executable(simple_grpc_keepalive_client simple_grpc_keepalive_client.cc)
+  target_link_libraries(
+    simple_grpc_keepalive_client
+    PRIVATE
+      grpcclient_static
+  )
+  install(
+    TARGETS simple_grpc_keepalive_client
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_grpc_custom_args_client
+  #
+  add_executable(simple_grpc_custom_args_client simple_grpc_custom_args_client.cc)
+  target_link_libraries(
+    simple_grpc_custom_args_client
+    PRIVATE
+      grpcclient_static
+  )
+  install(
+    TARGETS simple_grpc_custom_args_client
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_grpc_string_infer_client
+  #
+  add_executable(simple_grpc_string_infer_client simple_grpc_string_infer_client.cc)
+  target_link_libraries(
+    simple_grpc_string_infer_client
+    PRIVATE
+      grpcclient_static
+  )
+  install(
+    TARGETS simple_grpc_string_infer_client
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_grpc_async_infer_client
+  #
+  add_executable(simple_grpc_async_infer_client simple_grpc_async_infer_client.cc)
+  target_link_libraries(
+    simple_grpc_async_infer_client
+    PRIVATE
+      grpcclient_static
+  )
+  install(
+    TARGETS simple_grpc_async_infer_client
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_grpc_sequence_stream_infer_client
+  #
+  add_executable(simple_grpc_sequence_stream_infer_client simple_grpc_sequence_stream_infer_client.cc)
+  target_link_libraries(
+    simple_grpc_sequence_stream_infer_client
+    PRIVATE
+      grpcclient_static
+  )
+  install(
+    TARGETS simple_grpc_sequence_stream_infer_client
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_grpc_sequence_sync_infer_client
+  #
+  add_executable(simple_grpc_sequence_sync_infer_client simple_grpc_sequence_sync_infer_client.cc)
+  target_link_libraries(
+    simple_grpc_sequence_sync_infer_client
+    PRIVATE
+      grpcclient_static
+  )
+  install(
+    TARGETS simple_grpc_sequence_sync_infer_client
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_grpc_shm_client
+  #
+  add_executable(
+    simple_grpc_shm_client
+    simple_grpc_shm_client.cc
+    $<TARGET_OBJECTS:shm-utils-library>
+  )
+  target_link_libraries(
+    simple_grpc_shm_client
+    PRIVATE
+      grpcclient_static
+  )
+  install(
+    TARGETS simple_grpc_shm_client
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_grpc_custom_repeat
+  #
+  add_executable(simple_grpc_custom_repeat simple_grpc_custom_repeat.cc)
+  target_link_libraries(
+    simple_grpc_custom_repeat
+    PRIVATE
+      grpcclient_static
+  )
+  install(
+    TARGETS simple_grpc_custom_repeat
+    RUNTIME DESTINATION bin
+  )
+
+  if(${TRITON_ENABLE_GPU})
+    #
+    # simple_grpc_cudashm_client
+    #
+    set(
+      SIMPLE_GRPC_CUDA_SHM_SRCS
+      simple_grpc_cudashm_client.cc
+    )
+    set(
+      SIMPLE_GRPC_CUDA_SHM_HDRS
+    )
+    add_executable(simple_grpc_cudashm_client ${SIMPLE_GRPC_CUDA_SHM_SRCS} ${SIMPLE_GRPC_CUDA_SHM_HDRS})
+    target_include_directories(simple_grpc_cudashm_client PRIVATE ${CUDA_INCLUDE_DIRS})
+    target_link_libraries(
+      simple_grpc_cudashm_client
+      PRIVATE
+        grpcclient_static
+        ${CUDA_LIBRARIES}
+    )
+    install(
+      TARGETS simple_grpc_cudashm_client
+      RUNTIME DESTINATION bin
+    )
+  endif() # TRITON_ENABLE_GPU
+endif() # TRITON_ENABLE_CC_GRPC
+
+if(TRITON_ENABLE_CC_HTTP)
+  #
+  # simple_http_health_metadata
+  #
+  add_executable(
+    simple_http_health_metadata
+    simple_http_health_metadata.cc
+    $<TARGET_OBJECTS:json-utils-library>
+  )
+  target_link_libraries(
+    simple_http_health_metadata
+    PRIVATE
+      httpclient_static
+  )
+  install(
+    TARGETS simple_http_health_metadata
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_http_model_control
+  #
+  add_executable(
+    simple_http_model_control
+    simple_http_model_control.cc
+    $<TARGET_OBJECTS:json-utils-library>
+  )
+  target_link_libraries(
+    simple_http_model_control
+    PRIVATE
+      httpclient_static
+  )
+  install(
+    TARGETS simple_http_model_control
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_http_infer_client
+  #
+  add_executable(simple_http_infer_client simple_http_infer_client.cc)
+  target_link_libraries(
+    simple_http_infer_client
+    PRIVATE
+      httpclient_static
+  )
+  install(
+    TARGETS simple_http_infer_client
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_http_string_infer_client
+  #
+  add_executable(simple_http_string_infer_client simple_http_string_infer_client.cc)
+  target_link_libraries(
+    simple_http_string_infer_client
+    PRIVATE
+      httpclient_static
+  )
+  install(
+    TARGETS simple_http_string_infer_client
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_http_async_infer_client
+  #
+  add_executable(simple_http_async_infer_client simple_http_async_infer_client.cc)
+  target_link_libraries(
+    simple_http_async_infer_client
+    PRIVATE
+      httpclient_static
+  )
+  install(
+    TARGETS simple_http_async_infer_client
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_http_sequence_sync_infer_client
+  #
+  add_executable(simple_http_sequence_sync_infer_client simple_http_sequence_sync_infer_client.cc)
+  target_link_libraries(
+    simple_http_sequence_sync_infer_client
+    PRIVATE
+      httpclient_static
+  )
+  install(
+    TARGETS simple_http_sequence_sync_infer_client
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # simple_http_shm_client
+  #
+  add_executable(
+    simple_http_shm_client
+    simple_http_shm_client.cc
+    $<TARGET_OBJECTS:shm-utils-library>
+  )
+  target_link_libraries(
+    simple_http_shm_client
+    PRIVATE
+      httpclient_static
+      rt
+  )
+  install(
+    TARGETS simple_http_shm_client
+    RUNTIME DESTINATION bin
+  )
+
+  if(${TRITON_ENABLE_GPU})
+    #
+    # simple_http_cudashm_client
+    #
+    set(
+      SIMPLE_HTTP_CUDA_SHM_SRCS
+      simple_http_cudashm_client.cc
+    )
+    set(
+      SIMPLE_HTTP_CUDA_SHM_HDRS
+    )
+    add_executable(simple_http_cudashm_client ${SIMPLE_HTTP_CUDA_SHM_SRCS} ${SIMPLE_HTTP_CUDA_SHM_HDRS})
+    target_include_directories(simple_http_cudashm_client PRIVATE ${CUDA_INCLUDE_DIRS})
+    target_link_libraries(
+      simple_http_cudashm_client
+      PRIVATE
+        httpclient_static
+        ${CUDA_LIBRARIES}
+    )
+    install(
+      TARGETS simple_http_cudashm_client
+      RUNTIME DESTINATION bin
+    )
+  endif() # TRITON_ENABLE_GPU
+endif() # TRITON_ENABLE_CC_HTTP
+
+endif() # WIN32
--- a/src/c++/examples/ensemble_image_client.cc
+++ b/src/c++/examples/ensemble_image_client.cc
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <dirent.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <sstream>
+#include <string>
+
+#include "grpc_client.h"
+#include "http_client.h"
+#include "json_utils.h"
+
+namespace tc = triton::client;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+Postprocess(
+    const std::unique_ptr<tc::InferResult> result,
+    const std::vector<std::string>& filenames, const size_t batch_size,
+    const size_t topk)
+{
+  std::string output_name("OUTPUT");
+  if (!result->RequestStatus().IsOk()) {
+    std::cerr << "inference  failed with error: " << result->RequestStatus()
+              << std::endl;
+    exit(1);
+  }
+  if (filenames.size() != batch_size) {
+    std::cerr << "expected " << batch_size << " filenames, got "
+              << filenames.size() << std::endl;
+    exit(1);
+  }
+
+  // Get and validate the shape and datatype
+  std::vector<int64_t> shape;
+  tc::Error err = result->Shape(output_name, &shape);
+  if (!err.IsOk()) {
+    std::cerr << "unable to get shape for " << output_name << std::endl;
+    exit(1);
+  }
+  // Validate shape
+  if ((shape.size() != 2) || (shape[0] != (int)batch_size) ||
+      (shape[1] != (int)topk)) {
+    std::cerr << "received incorrect shapes for " << output_name << std::endl;
+    exit(1);
+  }
+  std::string datatype;
+  err = result->Datatype(output_name, &datatype);
+  if (!err.IsOk()) {
+    std::cerr << "unable to get datatype for " << output_name << std::endl;
+    exit(1);
+  }
+  // Validate datatype
+  if (datatype.compare("BYTES") != 0) {
+    std::cerr << "received incorrect datatype for " << output_name << ": "
+              << datatype << std::endl;
+    exit(1);
+  }
+
+  std::vector<std::string> result_data;
+  err = result->StringData(output_name, &result_data);
+  if (!err.IsOk()) {
+    std::cerr << "unable to get data for " << output_name << std::endl;
+    exit(1);
+  }
+
+  if (result_data.size() != (topk * batch_size)) {
+    std::cerr << "unexpected number of strings in the result, expected "
+              << (topk * batch_size) << ", got " << result_data.size()
+              << std::endl;
+    exit(1);
+  }
+  size_t index = 0;
+  for (size_t b = 0; b < batch_size; ++b) {
+    std::cout << "Image '" << filenames[b] << "':" << std::endl;
+    for (size_t c = 0; c < topk; ++c) {
+      std::istringstream is(result_data[index]);
+      int count = 0;
+      std::string token;
+      while (getline(is, token, ':')) {
+        if (count == 0) {
+          std::cout << "    " << token;
+        } else if (count == 1) {
+          std::cout << " (" << token << ")";
+        } else if (count == 2) {
+          std::cout << " = " << token;
+        }
+        count++;
+      }
+      std::cout << std::endl;
+      index++;
+    }
+  }
+}
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0]
+            << " [options] <image filename / image folder>" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-c <topk>" << std::endl;
+  std::cerr << "\t-i <Protocol used to communicate with inference service>"
+            << std::endl;
+  std::cerr << "\t-u <URL for inference service>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr << "For -c, the <topk> classes will be returned, default is 1."
+            << std::endl;
+  std::cerr
+      << "For -i, available protocols are 'grpc' and 'http'. Default is 'http."
+      << std::endl;
+
+  exit(1);
+}
+
+union TritonClient {
+  TritonClient()
+  {
+    new (&http_client_) std::unique_ptr<tc::InferenceServerHttpClient>{};
+  }
+  ~TritonClient() {}
+
+  std::unique_ptr<tc::InferenceServerHttpClient> http_client_;
+  std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_;
+};
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  std::string url("localhost:8000");
+  std::string protocol = "http";
+  size_t topk = 1;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vi:u:p:c:")) != -1) {
+    switch (opt) {
+      case 'v':
+        verbose = true;
+        break;
+      case 'i':
+        protocol = optarg;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 'c':
+        topk = std::atoi(optarg);
+        break;
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  if (topk <= 0) {
+    Usage(argv, "topk must be > 0");
+  }
+
+  // The ensemble model takes 1 input tensor with shape [ 1 ] and STRING
+  // data type and returns 1 output tensor as top k (see '-c' flag)
+  // classification result of the input.
+  std::string model_name = "preprocess_inception_ensemble";
+
+  // Create the inference client for the model.
+  TritonClient triton_client;
+  tc::Error err;
+  if (protocol == "http") {
+    err = tc::InferenceServerHttpClient::Create(
+        &triton_client.http_client_, url, verbose);
+  } else {
+    err = tc::InferenceServerGrpcClient::Create(
+        &triton_client.grpc_client_, url, verbose);
+  }
+  if (!err.IsOk()) {
+    std::cerr << "error: unable to create client for inference: " << err
+              << std::endl;
+    exit(1);
+  }
+
+  if (optind >= argc) {
+    Usage(argv, "image file or image folder must be specified");
+  }
+
+  if (!err.IsOk()) {
+    std::cerr << "error: unable to create inference context: " << err
+              << std::endl;
+    exit(1);
+  }
+
+  // Obtain a list of the image names to be processed
+  std::vector<std::string> image_filenames;
+
+  struct stat name_stat;
+  if (stat(argv[optind], &name_stat) != 0) {
+    std::cerr << "Failed to find '" << std::string(argv[optind])
+              << "': " << strerror(errno) << std::endl;
+    exit(1);
+  }
+
+  if (name_stat.st_mode & S_IFDIR) {
+    const std::string dirname = argv[optind];
+    DIR* dir_ptr = opendir(dirname.c_str());
+    struct dirent* d_ptr;
+    while ((d_ptr = readdir(dir_ptr)) != NULL) {
+      const std::string filename = d_ptr->d_name;
+      if ((filename != ".") && (filename != "..")) {
+        image_filenames.push_back(dirname + "/" + filename);
+      }
+    }
+    closedir(dir_ptr);
+  } else {
+    image_filenames.push_back(argv[optind]);
+  }
+
+  // Sort the filenames so that we always visit them in the same order
+  // (readdir does not guarantee any particular order).
+  std::sort(image_filenames.begin(), image_filenames.end());
+
+  // Read the raw image as string
+  std::vector<std::vector<std::string>> images;
+  for (const auto& fn : image_filenames) {
+    images.emplace_back();
+    auto& image_str = images.back();
+    std::ifstream file(fn);
+    file >> std::noskipws;
+    image_str.emplace_back(
+        (std::istreambuf_iterator<char>(file)),
+        std::istreambuf_iterator<char>());
+    if (image_str.back().empty()) {
+      std::cerr << "error: unable to read image file " << fn << std::endl;
+      exit(1);
+    }
+  }
+
+  // this client only send one request for simplicity. So the maximum number
+  // of the images to be processed is limited by the maximum batch size
+  size_t batch_size = 0;
+  if (protocol == "http") {
+    std::string model_config;
+    err = triton_client.http_client_->ModelConfig(&model_config, model_name);
+    if (!err.IsOk()) {
+      std::cerr << "error: failed to get model config: " << err << std::endl;
+    }
+
+    rapidjson::Document model_config_json;
+    err = tc::ParseJson(&model_config_json, model_config);
+    if (!err.IsOk()) {
+      std::cerr << "error: failed to parse model config: " << err << std::endl;
+    }
+
+    const auto bs_itr = model_config_json.FindMember("max_batch_size");
+    if (bs_itr != model_config_json.MemberEnd()) {
+      batch_size = bs_itr->value.GetInt();
+    }
+  } else {
+    inference::ModelConfigResponse model_config;
+    err = triton_client.grpc_client_->ModelConfig(&model_config, model_name);
+    if (!err.IsOk()) {
+      std::cerr << "error: failed to get model config: " << err << std::endl;
+    }
+    batch_size = model_config.config().max_batch_size();
+  }
+
+  if (images.size() > batch_size) {
+    std::cerr << "The number of images exceeds maximum batch size, only the"
+              << " first " << batch_size << " images, sorted by name"
+              << " alphabetically, will be processed" << std::endl;
+  }
+  batch_size = (images.size() < batch_size) ? images.size() : batch_size;
+
+  // Initialize the inputs with the data.
+  tc::InferInput* input;
+  std::vector<int64_t> shape{(int64_t)batch_size, 1};
+  err = tc::InferInput::Create(&input, "INPUT", shape, "BYTES");
+  if (!err.IsOk()) {
+    std::cerr << "unable to get input: " << err << std::endl;
+    exit(1);
+  }
+  std::shared_ptr<tc::InferInput> input_ptr(input);
+
+
+  tc::InferRequestedOutput* output;
+  // Set the number of classification expected
+  err = tc::InferRequestedOutput::Create(&output, "OUTPUT", topk);
+  if (!err.IsOk()) {
+    std::cerr << "unable to get output: " << err << std::endl;
+    exit(1);
+  }
+  std::shared_ptr<tc::InferRequestedOutput> output_ptr(output);
+
+  std::vector<tc::InferInput*> inputs = {input_ptr.get()};
+  std::vector<const tc::InferRequestedOutput*> outputs = {output_ptr.get()};
+
+  tc::InferOptions options(model_name);
+
+  FAIL_IF_ERR(input_ptr->Reset(), "unable to reset INPUT");
+
+  for (size_t i = 0; i < batch_size; i++) {
+    FAIL_IF_ERR(
+        input_ptr->AppendFromString(images[i]), "unable to set data for INPUT");
+  }
+
+  // Send inference request to the inference server.
+  tc::InferResult* results;
+  if (protocol == "http") {
+    FAIL_IF_ERR(
+        triton_client.http_client_->Infer(&results, options, inputs, outputs),
+        "unable to run model");
+  } else {
+    FAIL_IF_ERR(
+        triton_client.grpc_client_->Infer(&results, options, inputs, outputs),
+        "unable to run model");
+  }
+  std::unique_ptr<tc::InferResult> results_ptr;
+  results_ptr.reset(results);
+
+  // Print classification results
+  Postprocess(std::move(results_ptr), image_filenames, batch_size, topk);
+
+  return 0;
+}
--- a/src/c++/examples/image_client.cc
+++ b/src/c++/examples/image_client.cc
--- a/src/c++/examples/resnet50.cc
+++ b/src/c++/examples/resnet50.cc
+#include <dirent.h>
+#include <getopt.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <algorithm>
+#include <condition_variable>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <mutex>
+#include <queue>
+#include <string>
+#include "grpc_client.h"
+#include "http_client.h"
+#include "json_utils.h"
+
+#include <opencv2/opencv.hpp>
+#include <opencv2/core/version.hpp>
+#if CV_MAJOR_VERSION == 2
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+#elif CV_MAJOR_VERSION >= 3
+#include <opencv2/core.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/imgproc.hpp>
+#endif
+
+#if CV_MAJOR_VERSION == 4
+#define GET_TRANSFORMATION_CODE(x) cv::COLOR_##x
+#else
+#define GET_TRANSFORMATION_CODE(x) CV_##x
+#endif
+
+using namespace cv;
+namespace tc = triton::client;
+
+namespace {
+
+enum ProtocolType { HTTP = 0, GRPC = 1 };
+
+struct ModelInfo {
+    std::string output_name_;
+    std::string input_name_;
+    std::string input_datatype_;
+    int input_c_;
+    int input_h_;
+    int input_w_;
+    std::string input_format_;
+    int type1_;
+    int type3_;
+    int max_batch_size_;
+};
+
+std::vector<float> ComputeSoftmax(const std::vector<float>& results)
+{
+    float maxValue=-3.40e+38F;
+    for(int i=0;i<results.size();++i)
+    {
+        if(results[i]>maxValue)
+        {
+            maxValue=results[i];
+        }
+    }
+
+    std::vector<float> softmaxResults(results.size());
+    float sum=0.0;
+    for(int i=0;i<results.size();++i)
+    {
+        softmaxResults[i]= exp((float)(results[i] - maxValue));
+        sum+=softmaxResults[i];
+    }
+    for(int i=0;i<results.size();++i)
+    {
+       softmaxResults[i]= softmaxResults[i]/sum;
+    }
+
+    return softmaxResults;
+
+}
+
+void
+Preprocess(
+    const std::string& filename, int img_type1, int img_type3, size_t img_channels, 
+	const cv::Size& img_size, std::vector<uint8_t>* input_data)
+{
+    cv::Mat img = cv::imread(filename, 1);
+    if (img.empty()) {
+        std::cerr << "error: unable to decode image " << filename << std::endl;
+        exit(1);
+    }
+
+    cv::Mat sample;
+    if ((img.channels() == 3) && (img_channels == 3)) {
+        cv::cvtColor(img, sample, GET_TRANSFORMATION_CODE(BGR2RGB));
+    } else {
+        std::cerr << "unexpected number of channels " << img.channels()
+                  << " in input image, model expects " << img_channels << "."
+                  << std::endl;
+        exit(1);
+    }
+
+    cv::Mat sample_resized;
+    cv::resize(sample, sample_resized, img_size);
+
+    cv::Mat sample_type;
+    sample_resized.convertTo(sample_type, (img_channels == 3) ? img_type3 : img_type1);
+
+    cv::Mat sample_final;
+    sample_final = sample_type.mul(cv::Scalar(1/58.395, 1/57.12, 1/57.375));
+    sample_final = sample_final - cv::Scalar(123.675, 116.28, 103.53);
+
+    size_t img_byte_size = sample_final.total() * sample_final.elemSize();
+    size_t pos = 0;
+    input_data->resize(img_byte_size);
+
+    std::vector<cv::Mat> input_bgr_channels;
+    for (size_t i = 0; i < img_channels; ++i) {
+        input_bgr_channels.emplace_back(img_size.height, img_size.width, img_type1, &((*input_data)[pos]));
+        pos += input_bgr_channels.back().total() * input_bgr_channels.back().elemSize();
+    }
+
+    cv::split(sample_final, input_bgr_channels);
+
+    if (pos != img_byte_size) {
+        std::cerr << "unexpected total size of channels " << pos << ", expecting "
+                  << img_byte_size << std::endl;
+        exit(1);
+    }
+}
+
+
+void Postprocess(
+    const std::unique_ptr<tc::InferResult> result,
+    const std::vector<std::string>& filenames, const size_t batch_size,
+    const std::string& output_name, const bool batching)
+{
+    if (!result->RequestStatus().IsOk()) {
+        std::cerr << "inference  failed with error: " << result->RequestStatus()
+                  << std::endl;
+        exit(1);
+    }
+    if (filenames.size() != batch_size) {
+        std::cerr << "expected " << batch_size << " filenames, got "
+                  << filenames.size() << std::endl;
+        exit(1);
+    }
+
+    // Get and validate the shape and datatype
+    std::vector<int64_t> shape;
+    tc::Error err = result->Shape(output_name, &shape);
+    if (!err.IsOk()) {
+        std::cerr << "unable to get shape for " << output_name << std::endl;
+        exit(1);
+    }
+
+    std::string datatype;
+    err = result->Datatype(output_name, &datatype);
+    if (!err.IsOk()) {
+        std::cerr << "unable to get datatype for " << output_name << std::endl;
+        exit(1);
+    }
+
+    const uint8_t* result_data;
+    size_t outputCount = 0;
+    err = result->RawData(output_name, &result_data, &outputCount);
+    if (!err.IsOk()) {
+        std::cerr << "unable to get data for " << output_name << std::endl;
+        exit(1);
+    }
+
+    float* pdata = new float[1000];
+    memcpy(pdata, result_data, sizeof(uint8_t)*outputCount);
+    std::vector<float> logit;
+    for(int j=0; j<1000; ++j)
+    {
+        logit.push_back(pdata[j]);
+    }
+    std::vector<float> probs = ComputeSoftmax(logit);
+    for(int j=0; j<1000; ++j)
+    {
+        if (probs[j] >= 0.5)
+            fprintf(stdout,"label:%d,confidence:%.3f\n", j, probs[j]);
+    }
+}
+
+bool ParseType(const std::string& dtype, int* type1, int* type3)
+
+{
+    if (dtype.compare("UINT8") == 0) {
+        *type1 = CV_8UC1;
+        *type3 = CV_8UC3;
+    } else if (dtype.compare("INT8") == 0) {
+        *type1 = CV_8SC1;
+        *type3 = CV_8SC3;
+    } else if (dtype.compare("UINT16") == 0) {
+        *type1 = CV_16UC1;
+        *type3 = CV_16UC3;
+    } else if (dtype.compare("INT16") == 0) {
+        *type1 = CV_16SC1;
+        *type3 = CV_16SC3;
+    } else if (dtype.compare("INT32") == 0) {
+        *type1 = CV_32SC1;
+        *type3 = CV_32SC3;
+    } else if (dtype.compare("FP32") == 0) {
+        *type1 = CV_32FC1;
+        *type3 = CV_32FC3;
+    } else if (dtype.compare("FP64") == 0) {
+        *type1 = CV_64FC1;
+        *type3 = CV_64FC3;
+    } else {
+        return false;
+    }
+
+    return true;
+}
+
+void ParseModelHttp(
+    const rapidjson::Document& model_metadata,
+    const rapidjson::Document& model_config, const size_t batch_size,
+    ModelInfo* model_info)
+{
+    const auto& input_itr = model_metadata.FindMember("inputs");
+    size_t input_count = 0;
+    if (input_itr != model_metadata.MemberEnd()) {
+        input_count = input_itr->value.Size();
+    }
+    if (input_count != 1) {
+        std::cerr << "expecting 1 input, got " << input_count << std::endl;
+        exit(1);
+    }
+
+    const auto& output_itr = model_metadata.FindMember("outputs");
+    size_t output_count = 0;
+    if (output_itr != model_metadata.MemberEnd()) {
+        output_count = output_itr->value.Size();
+    }
+    if (output_count != 1) {
+        std::cerr << "expecting 1 output, got " << output_count << std::endl;
+        exit(1);
+    }
+
+    const auto& input_config_itr = model_config.FindMember("input");
+    input_count = 0;
+    if (input_config_itr != model_config.MemberEnd()) {
+        input_count = input_config_itr->value.Size();
+    }
+    if (input_count != 1) {
+        std::cerr << "expecting 1 input in model configuration, got " << input_count
+                  << std::endl;
+        exit(1);
+    }
+
+    const auto& input_metadata = *input_itr->value.Begin();
+    const auto& input_config = *input_config_itr->value.Begin();
+    const auto& output_metadata = *output_itr->value.Begin();
+
+    const auto& output_dtype_itr = output_metadata.FindMember("datatype");
+    if (output_dtype_itr == output_metadata.MemberEnd()) {
+        std::cerr << "output missing datatype in the metadata for model'"
+                  << model_metadata["name"].GetString() << "'" << std::endl;
+        exit(1);
+    }
+    auto datatype = std::string(output_dtype_itr->value.GetString(),
+                    output_dtype_itr->value.GetStringLength());
+    if (datatype.compare("FP32") != 0) {
+        std::cerr << "expecting output datatype to be FP32, model '"
+                  << model_metadata["name"].GetString() << "' output type is '"
+                  << datatype << "'" << std::endl;
+        exit(1);
+    }
+
+    int max_batch_size = 0;
+    const auto bs_itr = model_config.FindMember("max_batch_size");
+    if (bs_itr != model_config.MemberEnd()) {
+        max_batch_size = bs_itr->value.GetUint();
+    }
+    model_info->max_batch_size_ = max_batch_size;
+
+    if (max_batch_size == 0) {
+        if (batch_size != 1) {
+            std::cerr << "batching not supported for model '"
+                      << model_metadata["name"].GetString() << "'" << std::endl;
+            exit(1);
+        }
+    } else {
+        if (batch_size > (size_t)max_batch_size) {
+            std::cerr << "expecting batch size <= " << max_batch_size
+                      << " for model '" << model_metadata["name"].GetString() << "'"
+                      << std::endl;
+            exit(1);
+        }
+    }
+
+    const bool input_batch_dim = (max_batch_size == 0);
+    const size_t expected_input_dims = 3 + (input_batch_dim ? 1 : 0);
+    const auto input_shape_itr = input_metadata.FindMember("shape");
+
+    model_info->input_format_ = std::string(input_config["format"].GetString(), input_config["format"].GetStringLength());
+    model_info->output_name_ = std::string(output_metadata["name"].GetString(), output_metadata["name"].GetStringLength());
+    model_info->input_name_ = std::string(input_metadata["name"].GetString(), input_metadata["name"].GetStringLength());
+    model_info->input_datatype_ = std::string(input_metadata["datatype"].GetString(), input_metadata["datatype"].GetStringLength());
+
+    model_info->input_c_ = input_shape_itr->value[1].GetInt();
+    model_info->input_h_ = input_shape_itr->value[2].GetInt();
+    model_info->input_w_ = input_shape_itr->value[3].GetInt();
+
+    if (!ParseType(model_info->input_datatype_, &(model_info->type1_), &(model_info->type3_))) {
+        std::cerr << "unexpected input datatype '" << model_info->input_datatype_
+                  << "' for model \"" << model_metadata["name"].GetString()
+                  << std::endl;
+        exit(1);
+    }
+}
+
+union TritonClient {
+    TritonClient()
+    {
+        new (&http_client_) std::unique_ptr<tc::InferenceServerHttpClient>{};
+    }
+    ~TritonClient() {}
+
+    std::unique_ptr<tc::InferenceServerHttpClient> http_client_;
+    std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_;
+};
+
+}
+
+int
+main(int argc, char** argv)
+{
+    bool verbose = false;
+    bool async = false;
+    int batch_size = 1;
+
+    if (argc < 3 || argc > 3)
+    {
+        fprintf(stdout, "Two args are required: ./a resnet_50 image_path\n");
+        return -1;
+    }
+
+    std::string model_name = argv[1];
+    std::string fileName = argv[2];
+    std::string preprocess_output_filename;
+    std::string model_version = "";
+    std::string url("localhost:8000");
+    ProtocolType protocol = ProtocolType::HTTP;
+    tc::Headers http_headers;
+
+    TritonClient triton_client;
+    tc::Error err;
+    err = tc::InferenceServerHttpClient::Create(
+          &triton_client.http_client_, url, verbose);
+    if (!err.IsOk()) {
+        std::cerr << "error: unable to create client for inference: " << err << std::endl;
+        exit(1);
+    }
+
+    ModelInfo model_info;
+    std::string model_metadata;
+    err = triton_client.http_client_->ModelMetadata(&model_metadata, model_name, model_version, http_headers);
+    if (!err.IsOk()) {
+        std::cerr << "error: failed to get model metadata: " << err << std::endl;
+    }
+    rapidjson::Document model_metadata_json;
+    err = tc::ParseJson(&model_metadata_json, model_metadata);
+    if (!err.IsOk()) {
+        std::cerr << "error: failed to parse model metadata: " << err
+                  << std::endl;
+    }
+    std::string model_config;
+    err = triton_client.http_client_->ModelConfig(&model_config, model_name, model_version, http_headers);
+    if (!err.IsOk()) {
+        std::cerr << "error: failed to get model config: " << err << std::endl;
+    }
+    rapidjson::Document model_config_json;
+    err = tc::ParseJson(&model_config_json, model_config);
+    if (!err.IsOk()) {
+        std::cerr << "error: failed to parse model config: " << err << std::endl;
+    }
+    ParseModelHttp( model_metadata_json, model_config_json, batch_size, &model_info);
+
+    std::vector<std::string> image_filenames;
+    struct stat name_stat;
+    if (stat(fileName.c_str(), &name_stat) != 0) {
+        std::cerr << "Failed to find '" << fileName << "': " << strerror(errno) << std::endl;
+        exit(1);
+    }
+
+    if (name_stat.st_mode & S_IFDIR) {
+        const std::string dirname = fileName;
+        DIR* dir_ptr = opendir(dirname.c_str());
+        struct dirent* d_ptr;
+        while ((d_ptr = readdir(dir_ptr)) != NULL) {
+            const std::string filename = d_ptr->d_name;
+            if ((filename != ".") && (filename != "..")) {
+                image_filenames.push_back(dirname + "/" + filename);
+            }
+        }
+        closedir(dir_ptr);
+    } else {
+        image_filenames.push_back(fileName);
+    }
+
+    std::sort(image_filenames.begin(), image_filenames.end());
+
+    std::vector<std::vector<uint8_t>> image_data;
+    for (const auto& fn : image_filenames) {
+        image_data.emplace_back();
+        Preprocess(fn, model_info.type1_, model_info.type3_, model_info.input_c_, 
+                  cv::Size(model_info.input_w_, model_info.input_h_), &(image_data.back()));
+
+        if ((image_data.size() == 1) && !preprocess_output_filename.empty()) {
+            std::ofstream output_file(preprocess_output_filename);
+            std::ostream_iterator<uint8_t> output_iterator(output_file);
+            std::copy(image_data[0].begin(), image_data[0].end(), output_iterator);
+        }
+    }
+
+    std::vector<int64_t> shape;
+    shape.push_back(batch_size);
+    shape.push_back(model_info.input_c_);
+    shape.push_back(model_info.input_h_);
+    shape.push_back(model_info.input_w_);
+
+    tc::InferInput* input;
+    err = tc::InferInput::Create(&input, model_info.input_name_, shape, model_info.input_datatype_);
+    if (!err.IsOk()) {
+        std::cerr << "unable to get input: " << err << std::endl;
+        exit(1);
+    }
+    std::shared_ptr<tc::InferInput> input_ptr(input);
+
+    tc::InferRequestedOutput* output;
+    err = tc::InferRequestedOutput::Create(&output, model_info.output_name_);
+    if (!err.IsOk()) {
+        std::cerr << "unable to get output: " << err << std::endl;
+        exit(1);
+    }
+    std::shared_ptr<tc::InferRequestedOutput> output_ptr(output);
+
+    std::vector<tc::InferInput*> inputs = {input_ptr.get()};
+    std::vector<const tc::InferRequestedOutput*> outputs = {output_ptr.get()};
+
+    tc::InferOptions options(model_name);
+    options.model_version_ = model_version;
+
+    std::vector<std::unique_ptr<tc::InferResult>> results;
+    std::vector<std::vector<std::string>> result_filenames;
+    size_t image_idx = 0;
+    size_t done_cnt = 0;
+    size_t sent_count = 0;
+    bool last_request = false;
+    std::mutex mtx;
+    std::condition_variable cv;
+
+    auto callback_func = [&](tc::InferResult* result) 
+    {
+        {
+            std::lock_guard<std::mutex> lk(mtx);
+            results.emplace_back(result);
+            done_cnt++;
+        }
+        cv.notify_all();
+    };
+
+    while (!last_request) {
+        err = input_ptr->Reset();
+        if (!err.IsOk()) {
+            std::cerr << "failed resetting input: " << err << std::endl;
+            exit(1);
+        }
+
+        std::vector<std::string> input_filenames;
+        for (int idx = 0; idx < batch_size; ++idx) {
+            input_filenames.push_back(image_filenames[image_idx]);
+            err = input_ptr->AppendRaw(image_data[image_idx]);
+            if (!err.IsOk()) {
+                std::cerr << "failed setting input: " << err << std::endl;
+                exit(1);
+            }
+
+            image_idx = (image_idx + 1) % image_data.size();
+            if (image_idx == 0) {
+                last_request = true;
+            }
+        }
+
+        result_filenames.emplace_back(std::move(input_filenames));
+        options.request_id_ = std::to_string(sent_count);
+
+        double time1 = getTickCount();  
+        tc::InferResult* result;
+        if (protocol == ProtocolType::HTTP) {
+            err = triton_client.http_client_->Infer(
+                  &result, options, inputs, outputs, http_headers);
+        } else {
+            err = triton_client.grpc_client_->Infer(
+                  &result, options, inputs, outputs, http_headers);
+        }
+        if (!err.IsOk()) {
+            std::cerr << "failed sending synchronous infer request: " << err
+                      << std::endl;
+            exit(1);
+        }
+        results.emplace_back(result);
+        double time2 = getTickCount();
+        double elapsedTime = (time2 - time1)*1000 / getTickFrequency();
+        fprintf(stdout, "inference time:%f ms\n", elapsedTime);
+        sent_count++;
+    }
+
+    for (size_t idx = 0; idx < results.size(); idx++) {
+        std::cout << "Request " << idx << ", batch size " << batch_size << std::endl;
+        Postprocess(
+            std::move(results[idx]), result_filenames[idx], batch_size,
+            model_info.output_name_, model_info.max_batch_size_ != 0);
+    }
+
+  return 0;
+}
--- a/src/c++/examples/reuse_infer_objects_client.cc
+++ b/src/c++/examples/reuse_infer_objects_client.cc
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <unistd.h>
+
+#include <iostream>
+#include <string>
+
+#include "grpc_client.h"
+#include "http_client.h"
+#include "shm_utils.h"
+
+namespace tc = triton::client;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+union TritonClient {
+  TritonClient()
+  {
+    new (&http_client_) std::unique_ptr<tc::InferenceServerHttpClient>{};
+  }
+  ~TritonClient() {}
+
+  std::unique_ptr<tc::InferenceServerHttpClient> http_client_;
+  std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_;
+};
+
+void
+ValidateShapeAndDatatype(
+    const std::string& name, std::shared_ptr<tc::InferResult> result)
+{
+  std::vector<int64_t> shape;
+  FAIL_IF_ERR(
+      result->Shape(name, &shape), "unable to get shape for '" + name + "'");
+  // Validate shape
+  if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
+    std::cerr << "error: received incorrect shapes for '" << name << "'"
+              << std::endl;
+    exit(1);
+  }
+  std::string datatype;
+  FAIL_IF_ERR(
+      result->Datatype(name, &datatype),
+      "unable to get datatype for '" + name + "'");
+  // Validate datatype
+  if (datatype.compare("INT32") != 0) {
+    std::cerr << "error: received incorrect datatype for '" << name
+              << "': " << datatype << std::endl;
+    exit(1);
+  }
+}
+
+void
+InferAndValidate(
+    const bool use_shared_memory, TritonClient& triton_client,
+    const std::string& protocol, const tc::InferOptions& options,
+    const tc::Headers& http_headers, std::vector<tc::InferInput*>& inputs,
+    const size_t input_byte_size,
+    std::vector<tc::InferRequestedOutput*>& outputs,
+    const size_t output_byte_size, std::vector<int*>& shm_ptrs)
+{
+  std::vector<int32_t> input0_data(16);
+  std::vector<int32_t> input1_data(16);
+
+  int32_t* input0_data_ptr;
+  int32_t* input1_data_ptr;
+  int32_t* output0_data_ptr;
+  int32_t* output1_data_ptr;
+
+  FAIL_IF_ERR(inputs[0]->Reset(), "unable to reset input 'INPUT0'");
+  FAIL_IF_ERR(inputs[1]->Reset(), "unable to reset input 'INPUT1'");
+
+  if (use_shared_memory) {
+    input0_data_ptr = shm_ptrs[0];
+    input1_data_ptr = shm_ptrs[1];
+
+    FAIL_IF_ERR(
+        inputs[0]->SetSharedMemory(
+            "input_data", input_byte_size, 0 /* offset */),
+        "unable to set shared memory for INPUT0");
+    FAIL_IF_ERR(
+        inputs[1]->SetSharedMemory(
+            "input_data", input_byte_size, input_byte_size /* offset */),
+        "unable to set shared memory for INPUT1");
+
+    FAIL_IF_ERR(
+        outputs[0]->SetSharedMemory(
+            "output_data", output_byte_size, 0 /* offset */),
+        "unable to set shared memory for 'OUTPUT0'");
+    FAIL_IF_ERR(
+        outputs[1]->SetSharedMemory(
+            "output_data", output_byte_size, output_byte_size /* offset */),
+        "unable to set shared memory for 'OUTPUT1'");
+
+  } else {
+    input0_data_ptr = &input0_data[0];
+    input1_data_ptr = &input1_data[0];
+    // Create the data for the two input tensors. Initialize the first
+    // to unique integers and the second to all twos. We use twos instead
+    // of ones in input1_data to validate whether inputs were set correctly.
+    for (size_t i = 0; i < 16; ++i) {
+      input0_data[i] = i;
+      input1_data[i] = 2;
+    }
+
+    FAIL_IF_ERR(
+        inputs[0]->AppendRaw(
+            reinterpret_cast<uint8_t*>(&input0_data[0]),
+            input0_data.size() * sizeof(int32_t)),
+        "unable to set data for 'INPUT0'");
+    FAIL_IF_ERR(
+        inputs[1]->AppendRaw(
+            reinterpret_cast<uint8_t*>(&input1_data[0]),
+            input1_data.size() * sizeof(int32_t)),
+        "unable to set data for 'INPUT1'");
+
+    FAIL_IF_ERR(
+        outputs[0]->UnsetSharedMemory(),
+        "unable to unset shared memory for 'OUTPUT0'");
+    FAIL_IF_ERR(
+        outputs[1]->UnsetSharedMemory(),
+        "unable to unset shared memory for 'OUTPUT1'");
+  }
+
+  std::vector<const tc::InferRequestedOutput*> routputs = {
+      outputs[0], outputs[1]};
+
+  tc::InferResult* results;
+  if (protocol == "http") {
+    FAIL_IF_ERR(
+        triton_client.http_client_->Infer(
+            &results, options, inputs, routputs, http_headers),
+        "unable to run model");
+  } else {
+    FAIL_IF_ERR(
+        triton_client.grpc_client_->Infer(
+            &results, options, inputs, routputs, http_headers),
+        "unable to run model");
+  }
+  std::shared_ptr<tc::InferResult> results_ptr;
+  results_ptr.reset(results);
+
+  // Validate the results...
+  ValidateShapeAndDatatype("OUTPUT0", results_ptr);
+  ValidateShapeAndDatatype("OUTPUT1", results_ptr);
+
+  if (use_shared_memory) {
+    std::cout << "\n\n======== SHARED_MEMORY ========\n";
+    output0_data_ptr = shm_ptrs[2];
+    output1_data_ptr = shm_ptrs[3];
+  } else {
+    std::cout << "\n\n======== NO_SHARED_MEMORY ========\n";
+    // Get pointers to the result returned...
+    size_t recv_output0_byte_size;
+    FAIL_IF_ERR(
+        results_ptr->RawData(
+            "OUTPUT0", (const uint8_t**)&output0_data_ptr,
+            &recv_output0_byte_size),
+        "unable to get result data for 'OUTPUT0'");
+    if (recv_output0_byte_size != output_byte_size) {
+      std::cerr << "error: received incorrect byte size for 'OUTPUT0': "
+                << recv_output0_byte_size << std::endl;
+      exit(1);
+    }
+
+    size_t recv_output1_byte_size;
+    FAIL_IF_ERR(
+        results_ptr->RawData(
+            "OUTPUT1", (const uint8_t**)&output1_data_ptr,
+            &recv_output1_byte_size),
+        "unable to get result data for 'OUTPUT1'");
+    if (recv_output1_byte_size != output_byte_size) {
+      std::cerr << "error: received incorrect byte size for 'OUTPUT1': "
+                << recv_output1_byte_size << std::endl;
+      exit(1);
+    }
+  }
+  for (size_t i = 0; i < 16; ++i) {
+    std::cout << input0_data_ptr[i] << " + " << input1_data_ptr[i] << " = "
+              << output0_data_ptr[i] << std::endl;
+    std::cout << input0_data_ptr[i] << " - " << input1_data_ptr[i] << " = "
+              << output1_data_ptr[i] << std::endl;
+
+    if ((input0_data_ptr[i] + input1_data_ptr[i]) != output0_data_ptr[i]) {
+      std::cerr << "error: incorrect sum" << std::endl;
+      exit(1);
+    }
+    if ((input0_data_ptr[i] - input1_data_ptr[i]) != output1_data_ptr[i]) {
+      std::cerr << "error: incorrect difference" << std::endl;
+      exit(1);
+    }
+  }
+
+  std::cout << "\n======== END ========\n\n";
+}
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service>" << std::endl;
+  std::cerr << "\t-H <HTTP header>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl;
+
+  exit(1);
+}
+
+}  // namespace
+
+// Tests whether the same InferInput and InferRequestedOutput objects can be
+// successfully used repeatedly for different inferences using/not-using
+// shared memory.
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  std::string url("localhost:8000");
+  bool url_specified = false;
+  tc::Headers http_headers;
+  std::string protocol("http");
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vu:i:H:")) != -1) {
+    switch (opt) {
+      case 'v':
+        verbose = true;
+        break;
+      case 'u':
+        url = optarg;
+        url_specified = true;
+        break;
+      case 'i':
+        protocol = optarg;
+        std::transform(
+            protocol.begin(), protocol.end(), protocol.begin(), ::tolower);
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  // We use a simple model that takes 2 input tensors of 16 integers
+  // each and returns 2 output tensors of 16 integers each. One output
+  // tensor is the element-wise sum of the inputs and one output is
+  // the element-wise difference.
+  std::string model_name = "simple";
+  std::string model_version = "";
+
+  // Create the inference client for the server. From it
+  // extract and validate that the model meets the requirements for
+  // image classification.
+  TritonClient triton_client;
+  tc::Error err;
+  if (protocol == "http") {
+    err = tc::InferenceServerHttpClient::Create(
+        &triton_client.http_client_, url, verbose);
+  } else if (protocol == "grpc") {
+    if (!url_specified) {
+      url = "localhost:8001";
+    }
+    err = tc::InferenceServerGrpcClient::Create(
+        &triton_client.grpc_client_, url, verbose);
+  } else {
+    std::cerr
+        << "error: unsupported protocol provided: only supports grpc or http."
+        << std::endl;
+    exit(1);
+  }
+  if (!err.IsOk()) {
+    std::cerr << "error: unable to create client for inference: " << err
+              << std::endl;
+    exit(1);
+  }
+
+  // Unregistering all shared memory regions for a clean
+  // start.
+  if (protocol == "http") {
+    FAIL_IF_ERR(
+        triton_client.http_client_->UnregisterSystemSharedMemory(),
+        "unable to unregister all system shared memory regions");
+    FAIL_IF_ERR(
+        triton_client.http_client_->UnregisterCudaSharedMemory(),
+        "unable to unregister all cuda shared memory regions");
+  } else {
+    FAIL_IF_ERR(
+        triton_client.grpc_client_->UnregisterSystemSharedMemory(),
+        "unable to unregister all system shared memory regions");
+    FAIL_IF_ERR(
+        triton_client.grpc_client_->UnregisterCudaSharedMemory(),
+        "unable to unregister all cuda shared memory regions");
+  }
+
+  std::vector<int64_t> shape{1, 16};
+  size_t input_byte_size = 64;
+  size_t output_byte_size = 64;
+
+  // Initialize the inputs with the data.
+  tc::InferInput* input0;
+  tc::InferInput* input1;
+
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
+      "unable to get INPUT0");
+  std::shared_ptr<tc::InferInput> input0_ptr;
+  input0_ptr.reset(input0);
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
+      "unable to get INPUT1");
+  std::shared_ptr<tc::InferInput> input1_ptr;
+  input1_ptr.reset(input1);
+
+  // Create Input0 and Input1 in Shared Memory. Initialize Input0 to unique
+  // integers and Input1 to all ones.
+  std::string shm_key = "/input_simple";
+  int shm_fd_ip, *input0_shm;
+  FAIL_IF_ERR(
+      tc::CreateSharedMemoryRegion(shm_key, input_byte_size * 2, &shm_fd_ip),
+      "");
+  FAIL_IF_ERR(
+      tc::MapSharedMemory(
+          shm_fd_ip, 0, input_byte_size * 2, (void**)&input0_shm),
+      "");
+  FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_ip), "");
+  int* input1_shm = (int*)(input0_shm + 16);
+  for (size_t i = 0; i < 16; ++i) {
+    *(input0_shm + i) = i;
+    *(input1_shm + i) = 1;
+  }
+
+  if (protocol == "http") {
+    FAIL_IF_ERR(
+        triton_client.http_client_->RegisterSystemSharedMemory(
+            "input_data", "/input_simple", input_byte_size * 2),
+        "failed to register input shared memory region");
+  } else {
+    FAIL_IF_ERR(
+        triton_client.grpc_client_->RegisterSystemSharedMemory(
+            "input_data", "/input_simple", input_byte_size * 2),
+        "failed to register input shared memory region");
+  }
+
+  // Generate the outputs to be requested.
+  tc::InferRequestedOutput* output0;
+  tc::InferRequestedOutput* output1;
+
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
+      "unable to get 'OUTPUT0'");
+  std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
+  output0_ptr.reset(output0);
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
+      "unable to get 'OUTPUT1'");
+  std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
+  output1_ptr.reset(output1);
+
+  // Create Output0 and Output1 in Shared Memory
+  shm_key = "/output_simple";
+  int shm_fd_op;
+  int* output0_shm;
+  FAIL_IF_ERR(
+      tc::CreateSharedMemoryRegion(shm_key, output_byte_size * 2, &shm_fd_op),
+      "");
+  FAIL_IF_ERR(
+      tc::MapSharedMemory(
+          shm_fd_op, 0, output_byte_size * 2, (void**)&output0_shm),
+      "");
+  FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_op), "");
+  int* output1_shm = (int*)(output0_shm + 16);
+
+  if (protocol == "http") {
+    FAIL_IF_ERR(
+        triton_client.http_client_->RegisterSystemSharedMemory(
+            "output_data", "/output_simple", output_byte_size * 2),
+        "failed to register output shared memory region");
+  } else {
+    FAIL_IF_ERR(
+        triton_client.grpc_client_->RegisterSystemSharedMemory(
+            "output_data", "/output_simple", output_byte_size * 2),
+        "failed to register output shared memory region");
+  }
+
+  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
+  std::vector<tc::InferRequestedOutput*> outputs = {
+      output0_ptr.get(), output1_ptr.get()};
+
+  std::vector<int*> shm_ptrs = {
+      input0_shm, input1_shm, output0_shm, output1_shm};
+
+  // The inference settings. Will be using default for now.
+  tc::InferOptions options(model_name);
+  options.model_version_ = model_version;
+
+  // Issue inference using shared memory
+  InferAndValidate(
+      true /* use_shared_memory */, triton_client, protocol, options,
+      http_headers, inputs, input_byte_size, outputs, output_byte_size,
+      shm_ptrs);
+
+  // Issue inference without using shared memory
+  InferAndValidate(
+      false /* use_shared_memory */, triton_client, protocol, options,
+      http_headers, inputs, input_byte_size, outputs, output_byte_size,
+      shm_ptrs);
+
+  // Issue inference using shared memory
+  InferAndValidate(
+      true /* use_shared_memory */, triton_client, protocol, options,
+      http_headers, inputs, input_byte_size, outputs, output_byte_size,
+      shm_ptrs);
+
+  // Unregister shared memory
+  if (protocol == "http") {
+    FAIL_IF_ERR(
+        triton_client.http_client_->UnregisterSystemSharedMemory("input_data"),
+        "unable to unregister shared memory input region");
+    FAIL_IF_ERR(
+        triton_client.http_client_->UnregisterSystemSharedMemory("output_data"),
+        "unable to unregister shared memory output region");
+  } else {
+    FAIL_IF_ERR(
+        triton_client.grpc_client_->UnregisterSystemSharedMemory("input_data"),
+        "unable to unregister shared memory input region");
+    FAIL_IF_ERR(
+        triton_client.grpc_client_->UnregisterSystemSharedMemory("output_data"),
+        "unable to unregister shared memory output region");
+  }
+
+  // Cleanup shared memory
+  FAIL_IF_ERR(tc::UnmapSharedMemory(input0_shm, input_byte_size * 2), "");
+  FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/input_simple"), "");
+  FAIL_IF_ERR(tc::UnmapSharedMemory(output0_shm, output_byte_size * 2), "");
+  FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/output_simple"), "");
+
+  return 0;
+}
--- a/src/c++/examples/simple_grpc_async_infer_client.cc
+++ b/src/c++/examples/simple_grpc_async_infer_client.cc
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <unistd.h>
+
+#include <condition_variable>
+#include <iostream>
+#include <mutex>
+#include <string>
+
+#include "grpc_client.h"
+
+namespace tc = triton::client;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+ValidateShapeAndDatatype(
+    const std::string& name, std::shared_ptr<tc::InferResult> result)
+{
+  std::vector<int64_t> shape;
+  FAIL_IF_ERR(
+      result->Shape(name, &shape), "unable to get shape for '" + name + "'");
+  // Validate shape
+  if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
+    std::cerr << "error: received incorrect shapes for '" << name << "'"
+              << std::endl;
+    exit(1);
+  }
+  std::string datatype;
+  FAIL_IF_ERR(
+      result->Datatype(name, &datatype),
+      "unable to get datatype for '" + name + "'");
+  // Validate datatype
+  if (datatype.compare("INT32") != 0) {
+    std::cerr << "error: received incorrect datatype for '" << name
+              << "': " << datatype << std::endl;
+    exit(1);
+  }
+}
+
+void
+ValidateResult(
+    const std::shared_ptr<tc::InferResult> result,
+    std::vector<int32_t>& input0_data, std::vector<int32_t>& input1_data)
+{
+  // Validate the results...
+  ValidateShapeAndDatatype("OUTPUT0", result);
+  ValidateShapeAndDatatype("OUTPUT1", result);
+
+  // Get pointers to the result returned...
+  int32_t* output0_data;
+  size_t output0_byte_size;
+  FAIL_IF_ERR(
+      result->RawData(
+          "OUTPUT0", (const uint8_t**)&output0_data, &output0_byte_size),
+      "unable to get result data for 'OUTPUT0'");
+  if (output0_byte_size != 64) {
+    std::cerr << "error: received incorrect byte size for 'OUTPUT0': "
+              << output0_byte_size << std::endl;
+    exit(1);
+  }
+
+  int32_t* output1_data;
+  size_t output1_byte_size;
+  FAIL_IF_ERR(
+      result->RawData(
+          "OUTPUT1", (const uint8_t**)&output1_data, &output1_byte_size),
+      "unable to get result data for 'OUTPUT1'");
+  if (output0_byte_size != 64) {
+    std::cerr << "error: received incorrect byte size for 'OUTPUT1': "
+              << output0_byte_size << std::endl;
+    exit(1);
+  }
+
+  for (size_t i = 0; i < 16; ++i) {
+    std::cout << input0_data[i] << " + " << input1_data[i] << " = "
+              << *(output0_data + i) << std::endl;
+    std::cout << input0_data[i] << " - " << input1_data[i] << " = "
+              << *(output1_data + i) << std::endl;
+
+    if ((input0_data[i] + input1_data[i]) != *(output0_data + i)) {
+      std::cerr << "error: incorrect sum" << std::endl;
+      exit(1);
+    }
+    if ((input0_data[i] - input1_data[i]) != *(output1_data + i)) {
+      std::cerr << "error: incorrect difference" << std::endl;
+      exit(1);
+    }
+  }
+
+  // Get full response
+  std::cout << result->DebugString() << std::endl;
+}
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service>" << std::endl;
+  std::cerr << "\t-t <client timeout in microseconds>" << std::endl;
+  std::cerr << "\t-H <HTTP header>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl;
+
+  exit(1);
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  std::string url("localhost:8001");
+  tc::Headers http_headers;
+  uint32_t client_timeout = 0;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vu:t:H:")) != -1) {
+    switch (opt) {
+      case 'v':
+        verbose = true;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 't':
+        client_timeout = std::stoi(optarg);
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  // We use a simple model that takes 2 input tensors of 16 integers
+  // each and returns 2 output tensors of 16 integers each. One output
+  // tensor is the element-wise sum of the inputs and one output is
+  // the element-wise difference.
+  std::string model_name = "simple";
+  std::string model_version = "";
+
+  // Create a InferenceServerGrpcClient instance to communicate with the
+  // server using gRPC protocol.
+  std::unique_ptr<tc::InferenceServerGrpcClient> client;
+  FAIL_IF_ERR(
+      tc::InferenceServerGrpcClient::Create(&client, url, verbose),
+      "unable to create grpc client");
+
+  // Create the data for the two input tensors. Initialize the first
+  // to unique integers and the second to all ones.
+  std::vector<int32_t> input0_data(16);
+  std::vector<int32_t> input1_data(16);
+  for (size_t i = 0; i < 16; ++i) {
+    input0_data[i] = i;
+    input1_data[i] = 1;
+  }
+
+  std::vector<int64_t> shape{1, 16};
+
+  // Initialize the inputs with the data.
+  tc::InferInput* input0;
+  tc::InferInput* input1;
+
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
+      "unable to get INPUT0");
+  std::shared_ptr<tc::InferInput> input0_ptr;
+  input0_ptr.reset(input0);
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
+      "unable to get INPUT1");
+  std::shared_ptr<tc::InferInput> input1_ptr;
+  input1_ptr.reset(input1);
+
+  FAIL_IF_ERR(
+      input0_ptr->AppendRaw(
+          reinterpret_cast<uint8_t*>(&input0_data[0]),
+          input0_data.size() * sizeof(int32_t)),
+      "unable to set data for INPUT0");
+  FAIL_IF_ERR(
+      input1_ptr->AppendRaw(
+          reinterpret_cast<uint8_t*>(&input1_data[0]),
+          input1_data.size() * sizeof(int32_t)),
+      "unable to set data for INPUT1");
+
+  // Generate the outputs to be requested.
+  tc::InferRequestedOutput* output0;
+  tc::InferRequestedOutput* output1;
+
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
+      "unable to get 'OUTPUT0'");
+  std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
+  output0_ptr.reset(output0);
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
+      "unable to get 'OUTPUT1'");
+  std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
+  output1_ptr.reset(output1);
+
+
+  // The inference settings. Will be using default for now.
+  tc::InferOptions options(model_name);
+  options.model_version_ = model_version;
+  options.client_timeout_ = client_timeout;
+
+  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
+  std::vector<const tc::InferRequestedOutput*> outputs = {
+      output0_ptr.get(), output1_ptr.get()};
+
+  // Send inference request to the inference server.
+  std::mutex mtx;
+  std::condition_variable cv;
+  size_t repeat_cnt = 2;
+  size_t done_cnt = 0;
+  for (size_t i = 0; i < repeat_cnt; i++) {
+    FAIL_IF_ERR(
+        client->AsyncInfer(
+            [&, i](tc::InferResult* result) {
+              {
+                std::shared_ptr<tc::InferResult> result_ptr;
+                result_ptr.reset(result);
+                std::lock_guard<std::mutex> lk(mtx);
+                std::cout << "Callback no." << i << " is called" << std::endl;
+                done_cnt++;
+                if (result_ptr->RequestStatus().IsOk()) {
+                  ValidateResult(result_ptr, input0_data, input1_data);
+                } else {
+                  std::cerr << "error: Inference failed: "
+                            << result_ptr->RequestStatus() << std::endl;
+                  exit(1);
+                }
+              }
+              cv.notify_all();
+            },
+            options, inputs, outputs, http_headers),
+        "unable to run model");
+  }
+
+  // Wait until all callbacks are invoked
+  {
+    std::unique_lock<std::mutex> lk(mtx);
+    cv.wait(lk, [&]() {
+      if (done_cnt >= repeat_cnt) {
+        return true;
+      } else {
+        return false;
+      }
+    });
+  }
+  if (done_cnt == repeat_cnt) {
+    std::cout << "All done" << std::endl;
+  } else {
+    std::cerr << "Done cnt: " << done_cnt
+              << " does not match repeat cnt: " << repeat_cnt << std::endl;
+    exit(1);
+  }
+
+  // Send another AsyncInfer whose callback defers the completed request
+  // to another thread (main thread) to handle
+  bool callback_invoked = false;
+  std::shared_ptr<tc::InferResult> result_placeholder;
+  FAIL_IF_ERR(
+      client->AsyncInfer(
+          [&](tc::InferResult* result) {
+            {
+              std::shared_ptr<tc::InferResult> result_ptr;
+              result_ptr.reset(result);
+              // Defer the response retrieval to main thread
+              std::lock_guard<std::mutex> lk(mtx);
+              callback_invoked = true;
+              result_placeholder = std::move(result_ptr);
+            }
+            cv.notify_all();
+          },
+          options, inputs, outputs, http_headers),
+      "unable to run model");
+
+  // Ensure callback is completed
+  {
+    std::unique_lock<std::mutex> lk(mtx);
+    cv.wait(lk, [&]() { return callback_invoked; });
+  }
+
+  // Get deferred response
+  std::cout << "Getting results from deferred response" << std::endl;
+  if (result_placeholder->RequestStatus().IsOk()) {
+    ValidateResult(result_placeholder, input0_data, input1_data);
+  } else {
+    std::cerr << "error: Inference failed: "
+              << result_placeholder->RequestStatus() << std::endl;
+    exit(1);
+  }
+
+  tc::InferStat infer_stat;
+  client->ClientInferStat(&infer_stat);
+  std::cout << "completed_request_count " << infer_stat.completed_request_count
+            << std::endl;
+  std::cout << "cumulative_total_request_time_ns "
+            << infer_stat.cumulative_total_request_time_ns << std::endl;
+  std::cout << "cumulative_send_time_ns " << infer_stat.cumulative_send_time_ns
+            << std::endl;
+  std::cout << "cumulative_receive_time_ns "
+            << infer_stat.cumulative_receive_time_ns << std::endl;
+
+  std::cout << "PASS : Async Infer" << std::endl;
+
+  return 0;
+}
--- a/src/c++/examples/simple_grpc_cudashm_client.cc
+++ b/src/c++/examples/simple_grpc_cudashm_client.cc
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <cuda_runtime_api.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <string>
+
+#include "grpc_client.h"
+#include "shm_utils.h"
+
+namespace tc = triton::client;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  {                                                                \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+namespace {
+
+void
+ValidateShapeAndDatatype(
+    const std::string& name, std::shared_ptr<tc::InferResult> result)
+{
+  std::vector<int64_t> shape;
+  FAIL_IF_ERR(
+      result->Shape(name, &shape), "unable to get shape for '" + name + "'");
+  // Validate shape
+  if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
+    std::cerr << "error: received incorrect shapes for '" << name << "'"
+              << std::endl;
+    exit(1);
+  }
+  std::string datatype;
+  FAIL_IF_ERR(
+      result->Datatype(name, &datatype),
+      "unable to get datatype for '" + name + "'");
+  // Validate datatype
+  if (datatype.compare("INT32") != 0) {
+    std::cerr << "error: received incorrect datatype for '" << name
+              << "': " << datatype << std::endl;
+    exit(1);
+  }
+}
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << "error: " << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-v" << std::endl;
+  std::cerr << "\t-u <URL for inference service>" << std::endl;
+  std::cerr << "\t-H <HTTP header>" << std::endl;
+  std::cerr << std::endl;
+  std::cerr
+      << "For -H, header must be 'Header:Value'. May be given multiple times."
+      << std::endl;
+
+  exit(1);
+}
+
+}  // namespace
+
+#define FAIL_IF_CUDA_ERR(FUNC)                                     \
+  {                                                                \
+    const cudaError_t result = FUNC;                               \
+    if (result != cudaSuccess) {                                   \
+      std::cerr << "CUDA exception (line " << __LINE__             \
+                << "): " << cudaGetErrorName(result) << " ("       \
+                << cudaGetErrorString(result) << ")" << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  }
+
+void
+CreateCUDAIPCHandle(
+    cudaIpcMemHandle_t* cuda_handle, void* input_d_ptr, int device_id = 0)
+{
+  // Set the GPU device to the desired GPU
+  FAIL_IF_CUDA_ERR(cudaSetDevice(device_id));
+
+  //  Create IPC handle for data on the gpu
+  FAIL_IF_CUDA_ERR(cudaIpcGetMemHandle(cuda_handle, input_d_ptr));
+}
+
+int
+main(int argc, char** argv)
+{
+  bool verbose = false;
+  std::string url("localhost:8001");
+  tc::Headers http_headers;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
+    switch (opt) {
+      case 'v':
+        verbose = true;
+        break;
+      case 'u':
+        url = optarg;
+        break;
+      case 'H': {
+        std::string arg = optarg;
+        std::string header = arg.substr(0, arg.find(":"));
+        http_headers[header] = arg.substr(header.size() + 1);
+        break;
+      }
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  // We use a simple model that takes 2 input tensors of 16 integers
+  // each and returns 2 output tensors of 16 integers each. One output
+  // tensor is the element-wise sum of the inputs and one output is
+  // the element-wise difference.
+  std::string model_name = "simple";
+  std::string model_version = "";
+
+  // Create a InferenceServerGrpcClient instance to communicate with the
+  // server using gRPC protocol.
+  std::unique_ptr<tc::InferenceServerGrpcClient> client;
+  FAIL_IF_ERR(
+      tc::InferenceServerGrpcClient::Create(&client, url, verbose),
+      "unable to create grpc client");
+
+  // Unregistering all shared memory regions for a clean
+  // start.
+  FAIL_IF_ERR(
+      client->UnregisterSystemSharedMemory(),
+      "unable to unregister all system shared memory regions");
+  FAIL_IF_ERR(
+      client->UnregisterCudaSharedMemory(),
+      "unable to unregister all cuda shared memory regions");
+
+  std::vector<int64_t> shape{1, 16};
+  size_t input_byte_size = 64;
+  size_t output_byte_size = 64;
+
+  // Initialize the inputs with the data.
+  tc::InferInput* input0;
+  tc::InferInput* input1;
+
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
+      "unable to get INPUT0");
+  std::shared_ptr<tc::InferInput> input0_ptr;
+  input0_ptr.reset(input0);
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
+      "unable to get INPUT1");
+  std::shared_ptr<tc::InferInput> input1_ptr;
+  input1_ptr.reset(input1);
+
+  // Create Input0 and Input1 in CUDA Shared Memory. Initialize Input0 to
+  // unique integers and Input1 to all ones.
+  int input_data[32];
+  for (size_t i = 0; i < 16; ++i) {
+    input_data[i] = i;
+    input_data[16 + i] = 1;
+  }
+
+  // copy INPUT0 and INPUT1 data in GPU shared memory
+  int* input_d_ptr;
+  cudaMalloc((void**)&input_d_ptr, input_byte_size * 2);
+  cudaMemcpy(
+      (void*)input_d_ptr, (void*)input_data, input_byte_size * 2,
+      cudaMemcpyHostToDevice);
+
+  cudaIpcMemHandle_t input_cuda_handle;
+  CreateCUDAIPCHandle(&input_cuda_handle, (void*)input_d_ptr);
+
+  FAIL_IF_ERR(
+      client->RegisterCudaSharedMemory(
+          "input_data", input_cuda_handle, 0 /* device_id */,
+          input_byte_size * 2),
+      "failed to register input shared memory region");
+
+  FAIL_IF_ERR(
+      input0_ptr->SetSharedMemory(
+          "input_data", input_byte_size, 0 /* offset */),
+      "unable to set shared memory for INPUT0");
+  FAIL_IF_ERR(
+      input1_ptr->SetSharedMemory(
+          "input_data", input_byte_size, input_byte_size /* offset */),
+      "unable to set shared memory for INPUT1");
+
+  // Generate the outputs to be requested.
+  tc::InferRequestedOutput* output0;
+  tc::InferRequestedOutput* output1;
+
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
+      "unable to get 'OUTPUT0'");
+  std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
+  output0_ptr.reset(output0);
+  FAIL_IF_ERR(
+      tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
+      "unable to get 'OUTPUT1'");
+  std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
+  output1_ptr.reset(output1);
+
+  // Create Output0 and Output1 in CUDA Shared Memory
+  int *output0_d_ptr, *output1_d_ptr;
+  cudaMalloc((void**)&output0_d_ptr, output_byte_size * 2);
+  output1_d_ptr = (int*)output0_d_ptr + 16;
+
+  cudaIpcMemHandle_t output_cuda_handle;
+  CreateCUDAIPCHandle(&output_cuda_handle, (void*)output0_d_ptr);
+
+  FAIL_IF_ERR(
+      client->RegisterCudaSharedMemory(
+          "output_data", output_cuda_handle, 0 /* device_id */,
+          output_byte_size * 2),
+      "failed to register output shared memory region");
+
+  FAIL_IF_ERR(
+      output0_ptr->SetSharedMemory(
+          "output_data", output_byte_size, 0 /* offset */),
+      "unable to set shared memory for 'OUTPUT0'");
+  FAIL_IF_ERR(
+      output1_ptr->SetSharedMemory(
+          "output_data", output_byte_size, output_byte_size /* offset */),
+      "unable to set shared memory for 'OUTPUT1'");
+
+
+  // The inference settings. Will be using default for now.
+  tc::InferOptions options(model_name);
+  options.model_version_ = model_version;
+
+  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
+  std::vector<const tc::InferRequestedOutput*> outputs = {
+      output0_ptr.get(), output1_ptr.get()};
+
+  tc::InferResult* results;
+  FAIL_IF_ERR(
+      client->Infer(&results, options, inputs, outputs, http_headers),
+      "unable to run model");
+  std::shared_ptr<tc::InferResult> results_ptr;
+  results_ptr.reset(results);
+
+  // Validate the results...
+  ValidateShapeAndDatatype("OUTPUT0", results_ptr);
+  ValidateShapeAndDatatype("OUTPUT1", results_ptr);
+
+  // Copy input and output data back to the CPU
+  int output0_data[16], output1_data[16];
+  cudaMemcpy(
+      output0_data, output0_d_ptr, output_byte_size, cudaMemcpyDeviceToHost);
+  cudaMemcpy(
+      output1_data, output1_d_ptr, output_byte_size, cudaMemcpyDeviceToHost);
+
+  for (size_t i = 0; i < 16; ++i) {
+    std::cout << input_data[i] << " + " << input_data[16 + i] << " = "
+              << output0_data[i] << std::endl;
+    std::cout << input_data[i] << " + " << input_data[16 + i] << " = "
+              << output1_data[i] << std::endl;
+
+    if ((input_data[i] + input_data[16 + i]) != output0_data[i]) {
+      std::cerr << "error: incorrect sum" << std::endl;
+      exit(1);
+    }
+    if ((input_data[i] - input_data[16 + i]) != output1_data[i]) {
+      std::cerr << "error: incorrect difference" << std::endl;
+      exit(1);
+    }
+  }
+
+  // Get shared memory regions active/registered within triton
+  inference::CudaSharedMemoryStatusResponse status;
+  FAIL_IF_ERR(
+      client->CudaSharedMemoryStatus(&status),
+      "failed to get shared memory status");
+  std::cout << "Shared Memory Status:\n" << status.DebugString() << "\n";
+
+  // Unregister shared memory
+  FAIL_IF_ERR(
+      client->UnregisterCudaSharedMemory("input_data"),
+      "unable to unregister shared memory input region");
+  FAIL_IF_ERR(
+      client->UnregisterCudaSharedMemory("output_data"),
+      "unable to unregister shared memory output region");
+
+  // Free GPU memory
+  FAIL_IF_CUDA_ERR(cudaFree(input_d_ptr));
+  FAIL_IF_CUDA_ERR(cudaFree(output0_d_ptr));
+
+  std::cout << "PASS : Cuda Shared Memory " << std::endl;
+
+  return 0;
+}
--- a/src/c++/examples/simple_grpc_custom_args_client.cc
+++ b/src/c++/examples/simple_grpc_custom_args_client.cc
--- a/src/c++/examples/simple_grpc_custom_repeat.cc
+++ b/src/c++/examples/simple_grpc_custom_repeat.cc
--- a/src/c++/examples/simple_grpc_health_metadata.cc
+++ b/src/c++/examples/simple_grpc_health_metadata.cc
--- a/src/c++/examples/simple_grpc_infer_client.cc
+++ b/src/c++/examples/simple_grpc_infer_client.cc
--- a/src/c++/examples/simple_grpc_keepalive_client.cc
+++ b/src/c++/examples/simple_grpc_keepalive_client.cc