Commit c68e1835 authored by lijian6's avatar lijian6
Browse files

Initial commit

parents
Pipeline #561 failed with stages
in 0 seconds
---
BasedOnStyle: Google
IndentWidth: 2
ColumnLimit: 80
ContinuationIndentWidth: 4
UseTab: Never
MaxEmptyLinesToKeep: 2
SortIncludes: true
CompactNamespaces: true
ReflowComments: true
DerivePointerAlignment: false
PointerAlignment: Left
AllowShortIfStatementsOnASingleLine: false
AllowShortBlocksOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AlwaysBreakAfterReturnType: TopLevelDefinitions
AlignAfterOpenBracket: AlwaysBreak
BreakBeforeBraces: Custom
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: true
AfterNamespace: false
AfterStruct: false
AfterUnion: false
BeforeCatch: true
BinPackArguments: true
BinPackParameters: true
ConstructorInitializerAllOnOneLineOrOnePerLine: false
IndentCaseLabels: true
#VSCode
/.vscode
src/.vscode
src/c++/.vscode
src/python/.vscode
#C++
/build
*.so
src/c++/perf_analyzer/builddir/
src/c++/perf_analyzer/.vscode/
#Python
__pycache__/
*.pyc
#Other
node_modules
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
exclude: ^src/grpc_generated
repos:
- repo: https://github.com/timothycrosley/isort
rev: 5.12.0
hooks:
- id: isort
additional_dependencies: [toml]
- repo: https://github.com/psf/black
rev: 23.1.0
hooks:
- id: black
types_or: [python, cython]
- repo: https://github.com/PyCQA/flake8
rev: 5.0.4
hooks:
- id: flake8
args: [--max-line-length=88, --select=C,E,F,W,B,B950, --extend-ignore = E203,E501]
types_or: [python, cython]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v16.0.5
hooks:
- id: clang-format
types_or: [c, c++, cuda, proto, textproto, java]
args: ["-fallback-style=none", "-style=file", "-i"]
- repo: https://github.com/codespell-project/codespell
rev: v2.2.4
hooks:
- id: codespell
additional_dependencies: [tomli]
args: ["--toml", "pyproject.toml"]
exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$)
# More details about these pre-commit hooks here:
# https://pre-commit.com/hooks.html
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-case-conflict
- id: check-executables-have-shebangs
- id: check-merge-conflict
- id: check-json
- id: check-toml
- id: check-yaml
- id: check-shebang-scripts-are-executable
- id: end-of-file-fixer
types_or: [c, c++, cuda, proto, textproto, java, python]
- id: mixed-line-ending
- id: requirements-txt-fixer
- id: trailing-whitespace
# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.17)
project(tritonclient LANGUAGES C CXX)
#
# Options
#
set(TRITON_VERSION "0.0.0" CACHE STRING "Version for the clients")
set(PERF_ANALYZER_VERSION ${TRITON_VERSION} CACHE STRING "Build Version for Perf Analyzer")
option(TRITON_ENABLE_CC_HTTP "Build C++ HTTP client libraries" OFF)
option(TRITON_ENABLE_CC_GRPC "Build C++ GRPC client libraries" OFF)
option(TRITON_ENABLE_PYTHON_HTTP "Enable Python HTTP client libraries" OFF)
option(TRITON_ENABLE_PYTHON_GRPC "Enable Python GRPC client libraries" OFF)
option(TRITON_ENABLE_JAVA_HTTP "Enable JAVA HTTP client libraries" OFF)
option(TRITON_ENABLE_PERF_ANALYZER "Enable Performance Analyzer" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_C_API "Enable Performance Analyzer C API" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_TFS "Enable TensorFlow Serving support for Performance Analyzer" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_TS "Enable TorchServe support for Performance Analyzer" OFF)
option(TRITON_ENABLE_EXAMPLES "Include examples in build" OFF)
option(TRITON_ENABLE_TESTS "Include tests in build" OFF)
option(TRITON_ENABLE_GPU "Enable GPU support in libraries" OFF)
option(TRITON_ENABLE_ZLIB "Include ZLIB library in build" ON)
set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
set(TRITON_THIRD_PARTY_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/third_party repo")
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
if(NOT TRITON_ENABLE_PYTHON_GRPC)
set(TRITON_COMMON_ENABLE_PROTOBUF_PYTHON OFF)
endif()
#
# Dependencies
#
include(FetchContent)
FetchContent_Declare(
repo-third-party
GIT_REPOSITORY https://github.com/triton-inference-server/third_party.git
GIT_TAG ${TRITON_THIRD_PARTY_REPO_TAG}
GIT_SHALLOW ON
)
set(TRITON_THIRD_PARTY_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/third-party)
FetchContent_MakeAvailable(repo-third-party)
# Some libs are installed to ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib64 instead
# of ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib on Centos
set (LIB_DIR "lib")
# /etc/os-release does not exist on Windows
if(EXISTS "/etc/os-release")
file(STRINGS /etc/os-release DISTRO REGEX "^NAME=")
string(REGEX REPLACE "NAME=\"(.*)\"" "\\1" DISTRO "${DISTRO}")
message(STATUS "Distro Name: ${DISTRO}")
if(DISTRO MATCHES "CentOS.*")
set (LIB_DIR "lib64")
endif()
endif()
# Need to use ExternalProject for our builds so that we can get the
# correct dependencies between our components and the ExternalProject
# dependencies (found in the third_party repo)
include(ExternalProject)
if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(TRITON_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/cc-clients/install)
else()
set(TRITON_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
endif()
set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "")
if (OPENSSL_ROOT_DIR)
set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "-DOPENSSL_ROOT_DIR:PATH=${OPENSSL_ROOT_DIR}")
endif()
set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "")
if (CMAKE_TOOLCHAIN_FILE)
set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "-DCMAKE_TOOLCHAIN_FILE:PATH=${CMAKE_TOOLCHAIN_FILE}")
endif()
set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "")
if (VCPKG_TARGET_TRIPLET)
set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "-DVCPKG_TARGET_TRIPLET:STRING=${VCPKG_TARGET_TRIPLET}")
endif()
# Location where protobuf-config.cmake will be installed varies by
# platform
if (WIN32)
set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/cmake")
else()
set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/${LIB_DIR}/cmake/protobuf")
endif()
if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER OR TRITON_ENABLE_PERF_ANALYZER_C_API)
set(_cc_client_depends "")
if(${TRITON_ENABLE_CC_HTTP})
set(_cc_client_depends ${_cc_client_depends} curl)
endif() # TRITON_ENABLE_CC_HTTP
if(${TRITON_ENABLE_CC_GRPC} OR ${TRITON_ENABLE_PERF_ANALYZER})
set(_cc_client_depends ${_cc_client_depends} grpc protobuf)
endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_C_API})
message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_C_API=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_C_API
if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_TFS})
message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_TFS=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_TFS
if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_TS})
message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_TS=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_TS
ExternalProject_Add(cc-clients
PREFIX cc-clients
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/c++"
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/cc-clients"
CMAKE_CACHE_ARGS
${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
-DCURL_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/curl/${LIB_DIR}/cmake/CURL
-DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
-DgRPC_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/grpc/lib/cmake/grpc
-Dabsl_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/absl/${LIB_DIR}/cmake/absl
-Dc-ares_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/c-ares/${LIB_DIR}/cmake/c-ares
-DGTEST_ROOT:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/googletest
-DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG}
-DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG}
-DPERF_ANALYZER_VERSION:STRING=${PERF_ANALYZER_VERSION}
-DTRITON_ENABLE_CC_HTTP:BOOL=${TRITON_ENABLE_CC_HTTP}
-DTRITON_ENABLE_CC_GRPC:BOOL=${TRITON_ENABLE_CC_GRPC}
-DTRITON_ENABLE_PERF_ANALYZER:BOOL=${TRITON_ENABLE_PERF_ANALYZER}
-DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
-DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
-DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
-DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
-DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
-DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
-DTRITON_ENABLE_ZLIB:BOOL=${TRITON_ENABLE_ZLIB}
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
DEPENDS ${_cc_client_depends}
)
endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
if(TRITON_ENABLE_PYTHON_HTTP OR TRITON_ENABLE_PYTHON_GRPC)
set(_py_client_depends "")
if(${TRITON_ENABLE_PYTHON_GRPC})
set(_py_client_depends ${_py_client_depends} grpc protobuf)
endif() # TRITON_ENABLE_PYTHON_GRPC
if(${TRITON_ENABLE_PERF_ANALYZER})
set(_py_client_depends ${_py_client_depends} cc-clients)
endif() # TRITON_ENABLE_PERF_ANALYZER
ExternalProject_Add(python-clients
PREFIX python-clients
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/python"
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/python-clients"
CMAKE_CACHE_ARGS
${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
-DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
-DgRPC_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/grpc/lib/cmake/grpc
-Dabsl_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/absl/${LIB_DIR}/cmake/absl
-Dc-ares_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/c-ares/${LIB_DIR}/cmake/c-ares
-DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG}
-DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG}
-DTRITON_VERSION:STRING=${TRITON_VERSION}
-DTRITON_ENABLE_PYTHON_HTTP:BOOL=${TRITON_ENABLE_PYTHON_HTTP}
-DTRITON_ENABLE_PYTHON_GRPC:BOOL=${TRITON_ENABLE_PYTHON_GRPC}
-DTRITON_ENABLE_PERF_ANALYZER:BOOL=${TRITON_ENABLE_PERF_ANALYZER}
-DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
-DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
-DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
-DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
-DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
-DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
DEPENDS ${_py_client_depends}
)
endif() # TRITON_ENABLE_PYTHON_HTTP OR TRITON_ENABLE_PYTHON_GRPC
if(TRITON_ENABLE_JAVA_HTTP)
ExternalProject_Add(java-clients
PREFIX java-clients
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/java"
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/java-clients"
CMAKE_CACHE_ARGS
${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
-DTRITON_VERSION:STRING=${TRITON_VERSION}
-DTRITON_ENABLE_JAVA_HTTP:BOOL=${TRITON_ENABLE_JAVA_HTTP}
-DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
INSTALL_COMMAND ""
)
endif() # TRITON_ENABLE_JAVA_HTTP
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of NVIDIA CORPORATION nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
<!--
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
# Triton Client Libraries and Examples
To simplify communication with Triton, the Triton project provides
several client libraries and examples of how to use those
libraries. Ask questions or report problems in the main Triton [issues
page](https://github.com/triton-inference-server/server/issues).
The provided client libraries are:
* [C++ and Python APIs](#client-library-apis) that make it easy to
communicate with Triton from your C++ or Python application. Using
these libraries you can send either HTTP/REST or GRPC requests to
Triton to access all its capabilities: inferencing, status and
health, statistics and metrics, model repository management,
etc. These libraries also support using system and CUDA shared
memory for passing inputs to and receiving outputs from Triton.
* [Java API](#client-library-apis) (contributed by Alibaba Cloud PAI Team)
that makes it easy to communicate with Triton from your Java application
using HTTP/REST requests. For now, only a limited feature subset is supported.
* The [protoc
compiler](https://developers.google.com/protocol-buffers/docs/tutorials)
can generate a GRPC API in a large number of programming
languages.
* See [src/grpc_generated/go](src/grpc_generated/go) for an example for the
[Go programming language](https://golang.org/).
* See [src/grpc_generated/java](src/grpc_generated/java) for an example for
the Java and Scala programming languages.
* See [src/grpc_generated/javascript](src/grpc_generated/javascript) for
an example with JavaScript programming language.
There are also many example applications that show how to use these
libraries. Many of these examples use models from the [example model
repository](https://github.com/triton-inference-server/server/blob/main/docs/getting_started/quickstart.md#create-a-model-repository).
* C++ and Python versions of *image_client*, an example application
that uses the C++ or Python client library to execute image
classification models on Triton. See [Image Classification
Example](#image-classification-example).
* Several simple [C++ examples](src/c%2B%2B/examples) show
how to use the C++ library to communicate with Triton to perform
inferencing and other task. The C++ examples demonstrating the
HTTP/REST client are named with a *simple_http_* prefix and the
examples demonstrating the GRPC client are named with a
*simple_grpc_* prefix. See [Simple Example
Applications](#simple-example-applications).
* Several simple [Python examples](src/python/examples)
show how to use the Python library to communicate with Triton to
perform inferencing and other task. The Python examples
demonstrating the HTTP/REST client are named with a *simple_http_*
prefix and the examples demonstrating the GRPC client are named with
a *simple_grpc_* prefix. See [Simple Example
Applications](#simple-example-applications).
* Several simple [Java
examples](src/java/src/main/java/triton/client/examples) show how to
use the Java API to communicate with Triton to perform inferencing
and other task.
* A couple of [Python examples that communicate with Triton using a
Python GRPC API](src/python/examples) generated by the
[protoc compiler](https://grpc.io/docs/guides/). *grpc_client.py* is
a simple example that shows simple API
usage. *grpc_image_client.py* is functionally equivalent to
*image_client* but that uses a generated GRPC client stub to
communicate with Triton.
## Getting the Client Libraries And Examples
The easiest way to get the Python client library is to [use pip to
install the tritonclient
module](#download-using-python-package-installer-pip). You can also
download the C++, Python and Java client libraries from [Triton GitHub
release](#download-from-github), or [download a pre-built Docker image
containing the client libraries](#download-docker-image-from-ngc) from
[NVIDIA GPU Cloud (NGC)](https://ngc.nvidia.com).
It is also possible to build the client libraries with
[cmake](#build-using-cmake).
### Download Using Python Package Installer (pip)
The GRPC and HTTP client libraries are available as a Python package
that can be installed using a recent version of pip.
```
$ pip install tritonclient[all]
```
Using *all* installs both the HTTP/REST and GRPC client
libraries. There are two optional packages available, *grpc* and
*http* that can be used to install support specifically for the
protocol. For example, to install only the HTTP/REST client library
use,
```
$ pip install tritonclient[http]
```
The components of the install packages are:
* http
* grpc [ `service_pb2`, `service_pb2_grpc`, `model_config_pb2` ]
* utils [ linux distribution will include `shared_memory` and `cuda_shared_memory`]
The Linux version of the package also includes the
[perf_analyzer](src/c++/perf_analyzer/README.md)
binary. The perf_analyzer binary is built on Ubuntu 20.04 and may not
run on other Linux distributions. To run the perf_analyzer the
following dependency must be installed:
```bash
$ sudo apt update
$ sudo apt install libb64-dev
```
To reiterate, the installation on windows will not include perf_analyzer
nor shared_memory/cuda_shared_memory components.
### Download From GitHub
The client libraries and the perf_analyzer executable can be
downloaded from the [Triton GitHub release
page](https://github.com/triton-inference-server/server/releases)
corresponding to the release you are interested in. The client
libraries are found in the "Assets" section of the release page in a
tar file named after the version of the release and the OS, for
example, v2.3.0_ubuntu2004.clients.tar.gz.
The pre-built libraries can be used on the corresponding host system
or you can install them into the Triton container to have both the
clients and server in the same container.
```bash
$ mkdir clients
$ cd clients
$ wget https://github.com/triton-inference-server/server/releases/download/<tarfile_path>
$ tar xzf <tarfile_name>
```
After installing, the libraries can be found in lib/, the headers in
include/, the Python wheel files in python/, and the jar files in
java/. The bin/ and python/ directories contain the built examples
that you can learn more about below.
The perf_analyzer binary is built on Ubuntu 20.04 and may not run on
other Linux distributions. To use the C++ libraries or perf_analyzer
executable you must install some dependencies.
```bash
$ apt-get update
$ apt-get install curl libcurl4-openssl-dev libb64-dev
```
### Download Docker Image From NGC
A Docker image containing the client libraries and examples is
available from [NVIDIA GPU Cloud
(NGC)](https://ngc.nvidia.com). Before attempting to pull the
container ensure you have access to NGC. For step-by-step
instructions, see the [NGC Getting Started
Guide](http://docs.nvidia.com/ngc/ngc-getting-started-guide/index.html).
Use docker pull to get the client libraries and examples container
from NGC.
```bash
$ docker pull nvcr.io/nvidia/tritonserver:<xx.yy>-py3-sdk
```
Where \<xx.yy\> is the version that you want to pull. Within the
container the client libraries are in /workspace/install/lib, the
corresponding headers in /workspace/install/include, and the Python
wheel files in /workspace/install/python. The image will also contain
the built client examples.
**Important Note:** When running either the server or the client using
Docker containers and using the
[CUDA shared memory feature](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_shared_memory.md#cuda-shared-memory)
you need to add `--pid host` flag when launching the containers. The reason is
that CUDA IPC APIs require the PID of the source and destination of the exported
pointer to be different. Otherwise, Docker enables PID namespace which may
result in equality between the source and destination PIDs. The error will be
always observed when both of the containers are started in the non-interactive
mode.
### Build Using CMake
The client library build is performed using CMake. To build the client
libraries and examples with all features, first change directory to
the root of this repo and checkout the release version of the branch
that you want to build (or the *main* branch if you want to build the
under-development version).
```bash
$ git checkout main
```
If building the Java client you must first install Maven and a JDK
appropriate for your OS. For example, for Ubuntu you should install
the `default-jdk` package:
```
$ apt-get install default-jdk maven
```
Building on Windows vs. non-Windows requires different invocations
because Triton on Windows does not yet support all the build options.
#### Non-Windows
Use *cmake* to configure the build. You should adjust the flags depending on
the components of Triton Client you are working and would like to build.
For example, if you want to build Perf Analyzer with Triton C API, you can use \
`-DTRITON_ENABLE_PERF_ANALYZER=ON -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON`. You can
also use `TRITON_ENABLE_PERF_ANALYZER_TFS` and `TRITON_ENABLE_PERF_ANALYZER_TS` flags
to enable/disable support for TensorFlow Serving and TorchServe backend respectively in perf analyzer. \
The following command demonstrate how to build client with all the features:
```
$ mkdir build
$ cd build
$ cmake -DCMAKE_INSTALL_PREFIX=`pwd`/install -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON -DTRITON_ENABLE_PERF_ANALYZER=ON -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON -DTRITON_ENABLE_PERF_ANALYZER_TFS=ON -DTRITON_ENABLE_PERF_ANALYZER_TS=ON -DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON -DTRITON_ENABLE_JAVA_HTTP=ON -DTRITON_ENABLE_GPU=ON -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON ..
```
If you are building on a release branch (or on a development branch
that is based off of a release branch), then you must also use
additional cmake arguments to point to that release branch for repos
that the client build depends on. For example, if you are building the
r21.10 client branch then you need to use the following additional
cmake flags:
```
-DTRITON_COMMON_REPO_TAG=r21.10
-DTRITON_THIRD_PARTY_REPO_TAG=r21.10
-DTRITON_CORE_REPO_TAG=r21.10
```
Then use *make* to build the clients and examples.
```
$ make cc-clients python-clients java-clients
```
When the build completes the libraries and examples can be found in
the install directory.
#### Windows
To build the clients you must install an appropriate C++ compiler and
other dependencies required for the build. The easiest way to do this
is to create the [Windows min Docker
image](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/build.md#windows-10-min-container)
and the perform the build within a container launched from that image.
```
> docker run -it --rm win10-py3-min powershell
```
It is not necessary to use Docker or the win10-py3-min container for
the build, but if you do not you must install the appropriate
dependencies onto your host system.
Next use *cmake* to configure the build. If you are not building
within the win10-py3-min container then you will likely need to adjust
the CMAKE_TOOLCHAIN_FILE location in the following command.
```
$ mkdir build
$ cd build
$ cmake -DVCPKG_TARGET_TRIPLET=x64-windows -DCMAKE_TOOLCHAIN_FILE='/vcpkg/scripts/buildsystems/vcpkg.cmake' -DCMAKE_INSTALL_PREFIX=install -DTRITON_ENABLE_CC_GRPC=ON -DTRITON_ENABLE_PYTHON_GRPC=ON -DTRITON_ENABLE_GPU=OFF -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON ..
```
If you are building on a release branch (or on a development branch
that is based off of a release branch), then you must also use
additional cmake arguments to point to that release branch for repos
that the client build depends on. For example, if you are building the
r21.10 client branch then you need to use the following additional
cmake flags:
```
-DTRITON_COMMON_REPO_TAG=r21.10
-DTRITON_THIRD_PARTY_REPO_TAG=r21.10
-DTRITON_CORE_REPO_TAG=r21.10
```
Then use msbuild.exe to build.
```
$ msbuild.exe cc-clients.vcxproj -p:Configuration=Release -clp:ErrorsOnly
$ msbuild.exe python-clients.vcxproj -p:Configuration=Release -clp:ErrorsOnly
```
When the build completes the libraries and examples can be found in
the install directory.
## Client Library APIs
The C++ client API exposes a class-based interface. The commented
interface is available in
[grpc_client.h](src/c%2B%2B/library/grpc_client.h),
[http_client.h](src/c%2B%2B/library/http_client.h),
[common.h](src/c%2B%2B/library/common.h).
The Python client API provides similar capabilities as the C++
API. The commented interface is available in
[grpc](src/python/library/tritonclient/grpc/__init__.py)
and
[http](src/python/library/tritonclient/http/__init__.py).
The Java client API provides similar capabilities as the Python API
with similar classes and methods. For more information please refer
to the [Java client directory](src/java).
### HTTP Options
#### SSL/TLS
The client library allows communication across a secured channel using HTTPS protocol. Just setting these SSL options do not ensure the secure communication. Triton server should be running behind `https://` proxy such as nginx. The client can then establish a secure channel to the proxy. The [`qa/L0_https`](https://github.com/triton-inference-server/server/blob/main/qa/L0_https/test.sh) in the server repository demonstrates how this can be achieved.
For C++ client, see `HttpSslOptions` struct that encapsulates these options in [http_client.h](src/c%2B%2B/library/http_client.h).
For Python client, look for the following options in [http/\_\_init\_\_.py](src/python/library/tritonclient/http/__init__.py):
* ssl
* ssl_options
* ssl_context_factory
* insecure
The [C++](src/c%2B%2B/examples/simple_http_infer_client.cc) and [Python](src/python/examples/simple_http_infer_client.py) examples
demonstrates how to use SSL/TLS settings on client side.
#### Compression
The client library enables on-wire compression for HTTP transactions.
For C++ client, see `request_compression_algorithm` and `response_compression_algorithm` parameters in the `Infer` and `AsyncInfer` functions in [http_client.h](src/c%2B%2B/library/http_client.h). By default, the parameter is set as `CompressionType::NONE`.
Similarly, for Python client, see `request_compression_algorithm` and `response_compression_algorithm` parameters in `infer` and `async_infer` functions in [http/\_\_init\_\_.py](src/python/library/tritonclient/http/__init__.py).
The [C++](src/c%2B%2B/examples/simple_http_infer_client.cc) and [Python](src/python/examples/simple_http_infer_client.py) examples demonstrates how to use compression options.
#### Python AsyncIO Support (Beta)
*This feature is currently in beta and may be subject to change.*
Advanced users may call the Python client via `async` and `await` syntax. The
[infer](src/python/examples/simple_http_aio_infer_client.py) example
demonstrates how to infer with AsyncIO.
If using SSL/TLS with AsyncIO, look for the `ssl` and `ssl_context` options in
[http/aio/\_\_init\_\_.py](src/python/library/tritonclient/http/aio/__init__.py)
#### Python Client Plugin API (Beta)
*This feature is currently in beta and may be subject to change.*
The Triton Client Plugin API lets you register custom plugins to add or modify
request headers. This is useful if you have gateway in front of Triton Server
that requires extra headers for each request, such as HTTP Authorization. By
registering the plugin, your gateway will work with Python clients without
additional configuration. Note that Triton Server does not implement
authentication or authorization mechanisms and similarly,
Triton Server is not the direct consumer of the additional headers.
The plugin must implement the `__call__` method. The signature
of the `__call__` method should look like below:
```python
class MyPlugin:
def __call__(self, request):
"""This method will be called for every HTTP request. Currently, the only
field that can be accessed by the request object is the `request.headers`
field. This field must be updated in-place.
"""
request.headers['my-header-key'] = 'my-header-value'
```
After the plugin implementation is complete, you can register the
plugin by calling `register` on the `InferenceServerClient` object.
```python
from tritonclient.http import InferenceServerClient
client = InferenceServerClient(...)
# Register the plugin
my_plugin = MyPlugin()
client.register_plugin(my_plugin)
# All the method calls will update the headers according to the plugin
# implementation.
client.infer(...)
```
To unregister the plugin, you can call the `client.unregister_plugin()`
function.
##### Basic Auth
You can register the `BasicAuth` plugin that implements
[Basic Authentication](https://en.wikipedia.org/wiki/Basic_access_authentication).
```python
from tritonclient.grpc.auth import BasicAuth
from tritonclient.grpc import InferenceServerClient
basic_auth = BasicAuth('username', 'password')
client = InferenceServerClient('...')
client.register_plugin(basic_auth)
```
The example above shows how to register the plugin for
gRPC client. The `BasicAuth` plugin can be registered
similarly for HTTP and
[AsyncIO](#python-asyncio-support-beta)
clients.
### GRPC Options
#### SSL/TLS
The client library allows communication across a secured channel using gRPC protocol.
For C++ client, see `SslOptions` struct that encapsulates these options in [grpc_client.h](src/c%2B%2B/library/grpc_client.h).
For Python client, look for the following options in [grpc/\_\_init\_\_.py](src/python/library/tritonclient/grpc/__init__.py):
* ssl
* root_certificates
* private_key
* certificate_chain
The [C++](src/c%2B%2B/examples/simple_grpc_infer_client.cc) and [Python](src/python/examples/simple_grpc_infer_client.py) examples
demonstrates how to use SSL/TLS settings on client side. For information on the corresponding server-side parameters, refer to the
[server documentation](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/inference_protocols.md#ssltls)
#### Compression
The client library also exposes options to use on-wire compression for gRPC transactions.
For C++ client, see `compression_algorithm` parameter in the `Infer`, `AsyncInfer` and `StartStream` functions in [grpc_client.h](src/c%2B%2B/library/grpc_client.h). By default, the parameter is set as `GRPC_COMPRESS_NONE`.
Similarly, for Python client, see `compression_algorithm` parameter in `infer`, `async_infer` and `start_stream` functions in [grpc/\_\_init\_\_.py](src/python/library/tritonclient/grpc/__init__.py).
The [C++](src/c%2B%2B/examples/simple_grpc_infer_client.cc) and [Python](src/python/examples/simple_grpc_infer_client.py) examples demonstrates how to configure compression for clients. For information on the corresponding server-side parameters, refer to the [server documentation](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/inference_protocols.md#compression).
#### GRPC KeepAlive
Triton exposes GRPC KeepAlive parameters with the default values for both
client and server described [here](https://github.com/grpc/grpc/blob/master/doc/keepalive.md).
You can find a `KeepAliveOptions` struct/class that encapsulates these
parameters in both the [C++](src/c%2B%2B/library/grpc_client.h) and
[Python](src/python/library/tritonclient/grpc/__init__.py) client libraries.
There is also a [C++](src/c%2B%2B/examples/simple_grpc_keepalive_client.cc) and
[Python](src/python/examples/simple_grpc_keepalive_client.py) example
demonstrating how to setup these parameters on the client-side. For information
on the corresponding server-side parameters, refer to the
[server documentation](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/inference_protocols.md#grpc-keepalive)
#### Custom GRPC Channel Arguments
Advanced users may require specific client-side GRPC Channel Arguments that are
not currently exposed by Triton through direct means. To support this, Triton
allows users to pass custom channel arguments upon creating a GRPC client. When
using this option, it is up to the user to pass a valid combination of arguments
for their use case; Triton cannot feasibly test every possible combination of
channel arguments.
There is a [C++](src/c%2B%2B/examples/simple_grpc_custom_args_client.cc) and
[Python](src/python/examples/simple_grpc_custom_args_client.py) example
demonstrating how to construct and pass these custom arguments upon creating
a GRPC client.
You can find a comprehensive list of possible GRPC Channel Arguments
[here](https://grpc.github.io/grpc/core/group__grpc__arg__keys.html).
#### Python AsyncIO Support (Beta)
*This feature is currently in beta and may be subject to change.*
Advanced users may call the Python client via `async` and `await` syntax. The
[infer](src/python/examples/simple_grpc_aio_infer_client.py) and
[stream](src/python/examples/simple_grpc_aio_sequence_stream_infer_client.py)
examples demonstrate how to infer with AsyncIO.
## Simple Example Applications
This section describes several of the simple example applications and
the features that they illustrate.
### Bytes/String Datatype
Some frameworks support tensors where each element in the tensor is
variable-length binary data. Each element can hold a string or an
arbitrary sequence of bytes. On the client this datatype is BYTES (see
[Datatypes](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#datatypes)
for information on supported datatypes).
The Python client library uses numpy to represent input and output
tensors. For BYTES tensors the dtype of the numpy array should be
'np.object_' as shown in the examples. For backwards compatibility
with previous versions of the client library, 'np.bytes_' can also be
used for BYTES tensors. However, using 'np.bytes_' is not recommended
because using this dtype will cause numpy to remove all trailing zeros
from each array element. As a result, binary sequences ending in
zero(s) will not be represented correctly.
BYTES tensors are demonstrated in the C++ example applications
simple_http_string_infer_client.cc and
simple_grpc_string_infer_client.cc. String tensors are demonstrated
in the Python example application simple_http_string_infer_client.py
and simple_grpc_string_infer_client.py.
### System Shared Memory
Using system shared memory to communicate tensors between the client
library and Triton can significantly improve performance in some
cases.
Using system shared memory is demonstrated in the C++ example
applications simple_http_shm_client.cc and simple_grpc_shm_client.cc.
Using system shared memory is demonstrated in the Python example
application simple_http_shm_client.py and simple_grpc_shm_client.py.
Python does not have a standard way of allocating and accessing shared
memory so as an example a simple [system shared memory
module](src/python/library/tritonclient/utils/shared_memory)
is provided that can be used with the Python client library to create,
set and destroy system shared memory.
### CUDA Shared Memory
Using CUDA shared memory to communicate tensors between the client
library and Triton can significantly improve performance in some
cases.
Using CUDA shared memory is demonstrated in the C++ example
applications simple_http_cudashm_client.cc and
simple_grpc_cudashm_client.cc. Using CUDA shared memory is
demonstrated in the Python example application
simple_http_cudashm_client.py and simple_grpc_cudashm_client.py.
Python does not have a standard way of allocating and accessing shared
memory so as an example a simple [CUDA shared memory
module](src/python/library/tritonclient/utils/cuda_shared_memory)
is provided that can be used with the Python client library to create,
set and destroy CUDA shared memory. The module currently supports
numpy arrays ([example usage](src/python/examples/simple_http_cudashm_client.py))
and DLPack tensors ([example usage](src/python/library/tests/test_dlpack.py)).
### Client API for Stateful Models
When performing inference using a [stateful
model](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/architecture.md#stateful-models),
a client must identify which inference requests belong to the same
sequence and also when a sequence starts and ends.
Each sequence is identified with a sequence ID that is provided when
an inference request is made. It is up to the clients to create a
unique sequence ID. For each sequence the first inference request
should be marked as the start of the sequence and the last inference
requests should be marked as the end of the sequence.
The use of sequence ID and start and end flags are demonstrated in the
C++ example applications simple_http_sequence_stream_infer_client.cc
and simple_grpc_sequence_stream_infer_client.cc. The use of sequence
ID and start and end flags are demonstrated in the Python example
application simple_http_sequence_stream_infer_client.py and
simple_grpc_sequence_stream_infer_client.py.
## Image Classification Example
The image classification example that uses the C++ client API is
available at
[src/c++/examples/image_client.cc](src/c%2B%2B/examples/image_client.cc). The
Python version of the image classification client is available at
[src/python/examples/image_client.py](src/python/examples/image_client.py).
To use image_client (or image_client.py) you must first have a running
Triton that is serving one or more image classification models. The
image_client application requires that the model have a single image
input and produce a single classification output. If you don't have a
model repository with image classification models see
[QuickStart](https://github.com/triton-inference-server/server/blob/main/docs/getting_started/quickstart.md)
for instructions on how to create one.
Once Triton is running you can use the image_client application to
send inference requests. You can specify a single image or a directory
holding images. Here we send a request for the inception_graphdef
model for an image from the
[qa/images](https://github.com/triton-inference-server/server/tree/main/qa/images).
```bash
$ image_client -m inception_graphdef -s INCEPTION qa/images/mug.jpg
Request 0, batch size 1
Image 'qa/images/mug.jpg':
0.754130 (505) = COFFEE MUG
```
The Python version of the application accepts the same command-line
arguments.
```bash
$ python image_client.py -m inception_graphdef -s INCEPTION qa/images/mug.jpg
Request 0, batch size 1
Image 'qa/images/mug.jpg':
0.826384 (505) = COFFEE MUG
```
The image_client and image_client.py applications use the client
libraries to talk to Triton. By default image_client instructs the
client library to use HTTP/REST protocol, but you can use the GRPC
protocol by providing the -i flag. You must also use the -u flag to
point at the GRPC endpoint on Triton.
```bash
$ image_client -i grpc -u localhost:8001 -m inception_graphdef -s INCEPTION qa/images/mug.jpg
Request 0, batch size 1
Image 'qa/images/mug.jpg':
0.754130 (505) = COFFEE MUG
```
By default the client prints the most probable classification for the
image. Use the -c flag to see more classifications.
```bash
$ image_client -m inception_graphdef -s INCEPTION -c 3 qa/images/mug.jpg
Request 0, batch size 1
Image 'qa/images/mug.jpg':
0.754130 (505) = COFFEE MUG
0.157077 (969) = CUP
0.002880 (968) = ESPRESSO
```
The -b flag allows you to send a batch of images for inferencing.
The image_client application will form the batch from the image or
images that you specified. If the batch is bigger than the number of
images then image_client will just repeat the images to fill the
batch.
```bash
$ image_client -m inception_graphdef -s INCEPTION -c 3 -b 2 qa/images/mug.jpg
Request 0, batch size 2
Image 'qa/images/mug.jpg':
0.754130 (505) = COFFEE MUG
0.157077 (969) = CUP
0.002880 (968) = ESPRESSO
Image 'qa/images/mug.jpg':
0.754130 (505) = COFFEE MUG
0.157077 (969) = CUP
0.002880 (968) = ESPRESSO
```
Provide a directory instead of a single image to perform inferencing
on all images in the directory.
```
$ image_client -m inception_graphdef -s INCEPTION -c 3 -b 2 qa/images
Request 0, batch size 2
Image '/opt/tritonserver/qa/images/car.jpg':
0.819196 (818) = SPORTS CAR
0.033457 (437) = BEACH WAGON
0.031232 (480) = CAR WHEEL
Image '/opt/tritonserver/qa/images/mug.jpg':
0.754130 (505) = COFFEE MUG
0.157077 (969) = CUP
0.002880 (968) = ESPRESSO
Request 1, batch size 2
Image '/opt/tritonserver/qa/images/vulture.jpeg':
0.977632 (24) = VULTURE
0.000613 (9) = HEN
0.000560 (137) = EUROPEAN GALLINULE
Image '/opt/tritonserver/qa/images/car.jpg':
0.819196 (818) = SPORTS CAR
0.033457 (437) = BEACH WAGON
0.031232 (480) = CAR WHEEL
```
The [grpc_image_client.py](src/python/examples/grpc_image_client.py)
application behaves the same as the image_client except that instead
of using the client library it uses the GRPC generated library to
communicate with Triton.
## Ensemble Image Classification Example Application
In comparison to the image classification example above, this example
uses an ensemble of an image-preprocessing model implemented as a
[DALI
backend](https://github.com/triton-inference-server/dali_backend) and
a TensorFlow Inception model. The ensemble model allows you to send
the raw image binaries in the request and receive classification
results without preprocessing the images on the client.
To try this example you should follow the [DALI ensemble example
instructions](https://github.com/triton-inference-server/dali_backend/tree/main/docs/examples/inception_ensemble).
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
[tool.codespell]
# note: pre-commit passes explicit lists of files here, which this skip file list doesn't override -
# this is only to allow you to run codespell interactively
# this also overrides the grpc_generated folder, since it is generated
skip = "./.git,./.github,./src/grpc_generated"
# ignore short words, and typename parameters like OffsetT
ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
# ignore allowed words
# ignoring atleast to avoid testing::AtLeast from getting flagged
ignore-words-list = "atleast"
# use the 'clear' dictionary for unambiguous spelling mistakes
builtin = "clear"
# disable warnings about binary files and wrong encoding
quiet-level = 3
[tool.isort]
profile = "black"
use_parentheses = true
multi_line_output = 3
include_trailing_comma = true
force_grid_wrap = 0
ensure_newline_before_comments = true
line_length = 88
balanced_wrapping = true
indent = " "
skip = ["build"]
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.17)
project(cc-clients LANGUAGES C CXX)
#
# Options
#
option(TRITON_ENABLE_CC_HTTP "Build C++ HTTP client libraries" OFF)
option(TRITON_ENABLE_CC_GRPC "Build C++ GRPC client libraries" OFF)
option(TRITON_ENABLE_PERF_ANALYZER "Enable Performance Analyzer" OFF)
option(TRITON_ENABLE_EXAMPLES "Include examples in build" OFF)
option(TRITON_ENABLE_TESTS "Include tests in build" OFF)
option(TRITON_ENABLE_GPU "Enable GPU support in libraries" OFF)
option(TRITON_USE_THIRD_PARTY "Use local version of third party libraries" ON)
option(TRITON_KEEP_TYPEINFO "Keep typeinfo symbols by disabling ldscript" OFF)
option(TRITON_ENABLE_ZLIB "Include ZLIB library in build" ON)
set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
#
# Dependencies
#
include(FetchContent)
FetchContent_Declare(
repo-common
GIT_REPOSITORY https://github.com/triton-inference-server/common.git
GIT_TAG ${TRITON_COMMON_REPO_TAG}
GIT_SHALLOW ON
)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/9406a60c7839052e4944ea4dbc8344762a89f9bd.zip
)
if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
set(TRITON_COMMON_ENABLE_PROTOBUF ON)
set(TRITON_COMMON_ENABLE_GRPC ON)
if(TRITON_ENABLE_PERF_ANALYZER)
FetchContent_Declare(
repo-core
GIT_REPOSITORY https://github.com/triton-inference-server/core.git
GIT_TAG ${TRITON_CORE_REPO_TAG}
GIT_SHALLOW ON
)
FetchContent_MakeAvailable(repo-core)
endif() # TRITON_ENABLE_PERF_ANALYZER
endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
if(NOT TRITON_ENABLE_PERF_ANALYZER AND NOT TRITON_ENABLE_CC_HTTP AND NOT TRITON_ENABLE_EXAMPLES)
set(TRITON_COMMON_ENABLE_JSON OFF)
endif()
if(TRITON_ENABLE_TESTS OR TRITON_ENABLE_PERF_ANALYZER)
FetchContent_MakeAvailable(googletest)
endif()
FetchContent_MakeAvailable(repo-common)
if(TRITON_ENABLE_TESTS)
include_directories(
${repo-common_SOURCE_DIR}/include
)
endif() # TRITON_ENABLE_TESTS
#
# CUDA
#
if(TRITON_ENABLE_GPU)
find_package(CUDAToolkit REQUIRED)
endif() # TRITON_ENABLE_GPU
#
# libcurl
#
if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
find_package(CURL REQUIRED)
message(STATUS "Using curl ${CURL_VERSION}")
endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER
#
# Protobuf
#
if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
set(protobuf_MODULE_COMPATIBLE TRUE CACHE BOOL "protobuf_MODULE_COMPATIBLE" FORCE)
find_package(Protobuf CONFIG REQUIRED)
message(STATUS "Using protobuf ${Protobuf_VERSION}")
include_directories(${Protobuf_INCLUDE_DIRS})
endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
#
# GRPC
#
if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
find_package(gRPC CONFIG REQUIRED)
message(STATUS "Using gRPC ${gRPC_VERSION}")
include_directories($<TARGET_PROPERTY:gRPC::grpc,INTERFACE_INCLUDE_DIRECTORIES>)
endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
add_subdirectory(library)
endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC)
if(TRITON_ENABLE_EXAMPLES)
add_subdirectory(examples)
endif() # TRITON_ENABLE_EXAMPLES
if(TRITON_ENABLE_TESTS)
add_subdirectory(tests)
endif() # TRITON_ENABLE_TESTS
endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC
if(TRITON_ENABLE_PERF_ANALYZER)
add_subdirectory(perf_analyzer)
endif() # TRITON_ENABLE_PERF_ANALYZER
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required (VERSION 3.18)
if(WIN32)
message("C++ examples are not currently supported on Windows because "
"they require functionalities that are UNIX specific.")
else()
if(TRITON_ENABLE_CC_HTTP AND TRITON_ENABLE_CC_GRPC)
#
# yolov7-tiny
#
find_package(OpenCV REQUIRED)
add_executable(
yolov7-tiny
yolov7-tiny.cc
$<TARGET_OBJECTS:json-utils-library>
)
target_include_directories(
yolov7-tiny
PRIVATE ${OpenCV_INCLUDE_DIRS}
)
target_link_libraries(
yolov7-tiny
PRIVATE
grpcclient_static
httpclient_static
${OpenCV_LIBS}
)
install(
TARGETS yolov7-tiny
RUNTIME DESTINATION bin
)
#
# resnet50
#
find_package(OpenCV REQUIRED)
add_executable(
resnet50
resnet50.cc
$<TARGET_OBJECTS:json-utils-library>
)
target_include_directories(
resnet50
PRIVATE ${OpenCV_INCLUDE_DIRS}
)
target_link_libraries(
resnet50
PRIVATE
grpcclient_static
httpclient_static
${OpenCV_LIBS}
)
install(
TARGETS resnet50
RUNTIME DESTINATION bin
)
#
# image_client
#
find_package(OpenCV REQUIRED)
add_executable(
image_client
image_client.cc
$<TARGET_OBJECTS:json-utils-library>
)
target_include_directories(
image_client
PRIVATE ${OpenCV_INCLUDE_DIRS}
)
target_link_libraries(
image_client
PRIVATE
grpcclient_static
httpclient_static
${OpenCV_LIBS}
)
install(
TARGETS image_client
RUNTIME DESTINATION bin
)
#
# ensemble_image_client
#
add_executable(
ensemble_image_client
ensemble_image_client.cc
$<TARGET_OBJECTS:json-utils-library>
)
target_link_libraries(
ensemble_image_client
PRIVATE
grpcclient_static
httpclient_static
)
install(
TARGETS ensemble_image_client
RUNTIME DESTINATION bin
)
#
# reuse_infer_objects_client
#
add_executable(
reuse_infer_objects_client
reuse_infer_objects_client.cc
$<TARGET_OBJECTS:shm-utils-library>
)
target_link_libraries(
reuse_infer_objects_client
PRIVATE
grpcclient_static
httpclient_static
)
install(
TARGETS reuse_infer_objects_client
RUNTIME DESTINATION bin
)
endif() # TRITON_ENABLE_CC_HTTP AND TRITON_ENABLE_CC_GRPC
if(TRITON_ENABLE_CC_GRPC)
#
# simple_grpc_health_metadata
#
add_executable(simple_grpc_health_metadata simple_grpc_health_metadata.cc)
target_link_libraries(
simple_grpc_health_metadata
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_health_metadata
RUNTIME DESTINATION bin
)
#
# simple_grpc_model_control
#
add_executable(simple_grpc_model_control simple_grpc_model_control.cc)
target_link_libraries(
simple_grpc_model_control
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_model_control
RUNTIME DESTINATION bin
)
#
# simple_grpc_infer_client
#
add_executable(simple_grpc_infer_client simple_grpc_infer_client.cc)
target_link_libraries(
simple_grpc_infer_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_infer_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_keepalive_client
#
add_executable(simple_grpc_keepalive_client simple_grpc_keepalive_client.cc)
target_link_libraries(
simple_grpc_keepalive_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_keepalive_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_custom_args_client
#
add_executable(simple_grpc_custom_args_client simple_grpc_custom_args_client.cc)
target_link_libraries(
simple_grpc_custom_args_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_custom_args_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_string_infer_client
#
add_executable(simple_grpc_string_infer_client simple_grpc_string_infer_client.cc)
target_link_libraries(
simple_grpc_string_infer_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_string_infer_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_async_infer_client
#
add_executable(simple_grpc_async_infer_client simple_grpc_async_infer_client.cc)
target_link_libraries(
simple_grpc_async_infer_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_async_infer_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_sequence_stream_infer_client
#
add_executable(simple_grpc_sequence_stream_infer_client simple_grpc_sequence_stream_infer_client.cc)
target_link_libraries(
simple_grpc_sequence_stream_infer_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_sequence_stream_infer_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_sequence_sync_infer_client
#
add_executable(simple_grpc_sequence_sync_infer_client simple_grpc_sequence_sync_infer_client.cc)
target_link_libraries(
simple_grpc_sequence_sync_infer_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_sequence_sync_infer_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_shm_client
#
add_executable(
simple_grpc_shm_client
simple_grpc_shm_client.cc
$<TARGET_OBJECTS:shm-utils-library>
)
target_link_libraries(
simple_grpc_shm_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_shm_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_custom_repeat
#
add_executable(simple_grpc_custom_repeat simple_grpc_custom_repeat.cc)
target_link_libraries(
simple_grpc_custom_repeat
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_custom_repeat
RUNTIME DESTINATION bin
)
if(${TRITON_ENABLE_GPU})
#
# simple_grpc_cudashm_client
#
set(
SIMPLE_GRPC_CUDA_SHM_SRCS
simple_grpc_cudashm_client.cc
)
set(
SIMPLE_GRPC_CUDA_SHM_HDRS
)
add_executable(simple_grpc_cudashm_client ${SIMPLE_GRPC_CUDA_SHM_SRCS} ${SIMPLE_GRPC_CUDA_SHM_HDRS})
target_include_directories(simple_grpc_cudashm_client PRIVATE ${CUDA_INCLUDE_DIRS})
target_link_libraries(
simple_grpc_cudashm_client
PRIVATE
grpcclient_static
${CUDA_LIBRARIES}
)
install(
TARGETS simple_grpc_cudashm_client
RUNTIME DESTINATION bin
)
endif() # TRITON_ENABLE_GPU
endif() # TRITON_ENABLE_CC_GRPC
if(TRITON_ENABLE_CC_HTTP)
#
# simple_http_health_metadata
#
add_executable(
simple_http_health_metadata
simple_http_health_metadata.cc
$<TARGET_OBJECTS:json-utils-library>
)
target_link_libraries(
simple_http_health_metadata
PRIVATE
httpclient_static
)
install(
TARGETS simple_http_health_metadata
RUNTIME DESTINATION bin
)
#
# simple_http_model_control
#
add_executable(
simple_http_model_control
simple_http_model_control.cc
$<TARGET_OBJECTS:json-utils-library>
)
target_link_libraries(
simple_http_model_control
PRIVATE
httpclient_static
)
install(
TARGETS simple_http_model_control
RUNTIME DESTINATION bin
)
#
# simple_http_infer_client
#
add_executable(simple_http_infer_client simple_http_infer_client.cc)
target_link_libraries(
simple_http_infer_client
PRIVATE
httpclient_static
)
install(
TARGETS simple_http_infer_client
RUNTIME DESTINATION bin
)
#
# simple_http_string_infer_client
#
add_executable(simple_http_string_infer_client simple_http_string_infer_client.cc)
target_link_libraries(
simple_http_string_infer_client
PRIVATE
httpclient_static
)
install(
TARGETS simple_http_string_infer_client
RUNTIME DESTINATION bin
)
#
# simple_http_async_infer_client
#
add_executable(simple_http_async_infer_client simple_http_async_infer_client.cc)
target_link_libraries(
simple_http_async_infer_client
PRIVATE
httpclient_static
)
install(
TARGETS simple_http_async_infer_client
RUNTIME DESTINATION bin
)
#
# simple_http_sequence_sync_infer_client
#
add_executable(simple_http_sequence_sync_infer_client simple_http_sequence_sync_infer_client.cc)
target_link_libraries(
simple_http_sequence_sync_infer_client
PRIVATE
httpclient_static
)
install(
TARGETS simple_http_sequence_sync_infer_client
RUNTIME DESTINATION bin
)
#
# simple_http_shm_client
#
add_executable(
simple_http_shm_client
simple_http_shm_client.cc
$<TARGET_OBJECTS:shm-utils-library>
)
target_link_libraries(
simple_http_shm_client
PRIVATE
httpclient_static
rt
)
install(
TARGETS simple_http_shm_client
RUNTIME DESTINATION bin
)
if(${TRITON_ENABLE_GPU})
#
# simple_http_cudashm_client
#
set(
SIMPLE_HTTP_CUDA_SHM_SRCS
simple_http_cudashm_client.cc
)
set(
SIMPLE_HTTP_CUDA_SHM_HDRS
)
add_executable(simple_http_cudashm_client ${SIMPLE_HTTP_CUDA_SHM_SRCS} ${SIMPLE_HTTP_CUDA_SHM_HDRS})
target_include_directories(simple_http_cudashm_client PRIVATE ${CUDA_INCLUDE_DIRS})
target_link_libraries(
simple_http_cudashm_client
PRIVATE
httpclient_static
${CUDA_LIBRARIES}
)
install(
TARGETS simple_http_cudashm_client
RUNTIME DESTINATION bin
)
endif() # TRITON_ENABLE_GPU
endif() # TRITON_ENABLE_CC_HTTP
endif() # WIN32
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <dirent.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fstream>
#include <iostream>
#include <iterator>
#include <sstream>
#include <string>
#include "grpc_client.h"
#include "http_client.h"
#include "json_utils.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
Postprocess(
const std::unique_ptr<tc::InferResult> result,
const std::vector<std::string>& filenames, const size_t batch_size,
const size_t topk)
{
std::string output_name("OUTPUT");
if (!result->RequestStatus().IsOk()) {
std::cerr << "inference failed with error: " << result->RequestStatus()
<< std::endl;
exit(1);
}
if (filenames.size() != batch_size) {
std::cerr << "expected " << batch_size << " filenames, got "
<< filenames.size() << std::endl;
exit(1);
}
// Get and validate the shape and datatype
std::vector<int64_t> shape;
tc::Error err = result->Shape(output_name, &shape);
if (!err.IsOk()) {
std::cerr << "unable to get shape for " << output_name << std::endl;
exit(1);
}
// Validate shape
if ((shape.size() != 2) || (shape[0] != (int)batch_size) ||
(shape[1] != (int)topk)) {
std::cerr << "received incorrect shapes for " << output_name << std::endl;
exit(1);
}
std::string datatype;
err = result->Datatype(output_name, &datatype);
if (!err.IsOk()) {
std::cerr << "unable to get datatype for " << output_name << std::endl;
exit(1);
}
// Validate datatype
if (datatype.compare("BYTES") != 0) {
std::cerr << "received incorrect datatype for " << output_name << ": "
<< datatype << std::endl;
exit(1);
}
std::vector<std::string> result_data;
err = result->StringData(output_name, &result_data);
if (!err.IsOk()) {
std::cerr << "unable to get data for " << output_name << std::endl;
exit(1);
}
if (result_data.size() != (topk * batch_size)) {
std::cerr << "unexpected number of strings in the result, expected "
<< (topk * batch_size) << ", got " << result_data.size()
<< std::endl;
exit(1);
}
size_t index = 0;
for (size_t b = 0; b < batch_size; ++b) {
std::cout << "Image '" << filenames[b] << "':" << std::endl;
for (size_t c = 0; c < topk; ++c) {
std::istringstream is(result_data[index]);
int count = 0;
std::string token;
while (getline(is, token, ':')) {
if (count == 0) {
std::cout << " " << token;
} else if (count == 1) {
std::cout << " (" << token << ")";
} else if (count == 2) {
std::cout << " = " << token;
}
count++;
}
std::cout << std::endl;
index++;
}
}
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0]
<< " [options] <image filename / image folder>" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-c <topk>" << std::endl;
std::cerr << "\t-i <Protocol used to communicate with inference service>"
<< std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << std::endl;
std::cerr << "For -c, the <topk> classes will be returned, default is 1."
<< std::endl;
std::cerr
<< "For -i, available protocols are 'grpc' and 'http'. Default is 'http."
<< std::endl;
exit(1);
}
union TritonClient {
TritonClient()
{
new (&http_client_) std::unique_ptr<tc::InferenceServerHttpClient>{};
}
~TritonClient() {}
std::unique_ptr<tc::InferenceServerHttpClient> http_client_;
std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_;
};
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8000");
std::string protocol = "http";
size_t topk = 1;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vi:u:p:c:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'i':
protocol = optarg;
break;
case 'u':
url = optarg;
break;
case 'c':
topk = std::atoi(optarg);
break;
case '?':
Usage(argv);
break;
}
}
if (topk <= 0) {
Usage(argv, "topk must be > 0");
}
// The ensemble model takes 1 input tensor with shape [ 1 ] and STRING
// data type and returns 1 output tensor as top k (see '-c' flag)
// classification result of the input.
std::string model_name = "preprocess_inception_ensemble";
// Create the inference client for the model.
TritonClient triton_client;
tc::Error err;
if (protocol == "http") {
err = tc::InferenceServerHttpClient::Create(
&triton_client.http_client_, url, verbose);
} else {
err = tc::InferenceServerGrpcClient::Create(
&triton_client.grpc_client_, url, verbose);
}
if (!err.IsOk()) {
std::cerr << "error: unable to create client for inference: " << err
<< std::endl;
exit(1);
}
if (optind >= argc) {
Usage(argv, "image file or image folder must be specified");
}
if (!err.IsOk()) {
std::cerr << "error: unable to create inference context: " << err
<< std::endl;
exit(1);
}
// Obtain a list of the image names to be processed
std::vector<std::string> image_filenames;
struct stat name_stat;
if (stat(argv[optind], &name_stat) != 0) {
std::cerr << "Failed to find '" << std::string(argv[optind])
<< "': " << strerror(errno) << std::endl;
exit(1);
}
if (name_stat.st_mode & S_IFDIR) {
const std::string dirname = argv[optind];
DIR* dir_ptr = opendir(dirname.c_str());
struct dirent* d_ptr;
while ((d_ptr = readdir(dir_ptr)) != NULL) {
const std::string filename = d_ptr->d_name;
if ((filename != ".") && (filename != "..")) {
image_filenames.push_back(dirname + "/" + filename);
}
}
closedir(dir_ptr);
} else {
image_filenames.push_back(argv[optind]);
}
// Sort the filenames so that we always visit them in the same order
// (readdir does not guarantee any particular order).
std::sort(image_filenames.begin(), image_filenames.end());
// Read the raw image as string
std::vector<std::vector<std::string>> images;
for (const auto& fn : image_filenames) {
images.emplace_back();
auto& image_str = images.back();
std::ifstream file(fn);
file >> std::noskipws;
image_str.emplace_back(
(std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
if (image_str.back().empty()) {
std::cerr << "error: unable to read image file " << fn << std::endl;
exit(1);
}
}
// this client only send one request for simplicity. So the maximum number
// of the images to be processed is limited by the maximum batch size
size_t batch_size = 0;
if (protocol == "http") {
std::string model_config;
err = triton_client.http_client_->ModelConfig(&model_config, model_name);
if (!err.IsOk()) {
std::cerr << "error: failed to get model config: " << err << std::endl;
}
rapidjson::Document model_config_json;
err = tc::ParseJson(&model_config_json, model_config);
if (!err.IsOk()) {
std::cerr << "error: failed to parse model config: " << err << std::endl;
}
const auto bs_itr = model_config_json.FindMember("max_batch_size");
if (bs_itr != model_config_json.MemberEnd()) {
batch_size = bs_itr->value.GetInt();
}
} else {
inference::ModelConfigResponse model_config;
err = triton_client.grpc_client_->ModelConfig(&model_config, model_name);
if (!err.IsOk()) {
std::cerr << "error: failed to get model config: " << err << std::endl;
}
batch_size = model_config.config().max_batch_size();
}
if (images.size() > batch_size) {
std::cerr << "The number of images exceeds maximum batch size, only the"
<< " first " << batch_size << " images, sorted by name"
<< " alphabetically, will be processed" << std::endl;
}
batch_size = (images.size() < batch_size) ? images.size() : batch_size;
// Initialize the inputs with the data.
tc::InferInput* input;
std::vector<int64_t> shape{(int64_t)batch_size, 1};
err = tc::InferInput::Create(&input, "INPUT", shape, "BYTES");
if (!err.IsOk()) {
std::cerr << "unable to get input: " << err << std::endl;
exit(1);
}
std::shared_ptr<tc::InferInput> input_ptr(input);
tc::InferRequestedOutput* output;
// Set the number of classification expected
err = tc::InferRequestedOutput::Create(&output, "OUTPUT", topk);
if (!err.IsOk()) {
std::cerr << "unable to get output: " << err << std::endl;
exit(1);
}
std::shared_ptr<tc::InferRequestedOutput> output_ptr(output);
std::vector<tc::InferInput*> inputs = {input_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {output_ptr.get()};
tc::InferOptions options(model_name);
FAIL_IF_ERR(input_ptr->Reset(), "unable to reset INPUT");
for (size_t i = 0; i < batch_size; i++) {
FAIL_IF_ERR(
input_ptr->AppendFromString(images[i]), "unable to set data for INPUT");
}
// Send inference request to the inference server.
tc::InferResult* results;
if (protocol == "http") {
FAIL_IF_ERR(
triton_client.http_client_->Infer(&results, options, inputs, outputs),
"unable to run model");
} else {
FAIL_IF_ERR(
triton_client.grpc_client_->Infer(&results, options, inputs, outputs),
"unable to run model");
}
std::unique_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);
// Print classification results
Postprocess(std::move(results_ptr), image_filenames, batch_size, topk);
return 0;
}
// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <dirent.h>
#include <getopt.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <algorithm>
#include <condition_variable>
#include <fstream>
#include <iostream>
#include <iterator>
#include <mutex>
#include <opencv2/core/version.hpp>
#include <queue>
#include <string>
#include "grpc_client.h"
#include "http_client.h"
#include "json_utils.h"
#if CV_MAJOR_VERSION == 2
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#elif CV_MAJOR_VERSION >= 3
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#endif
#if CV_MAJOR_VERSION == 4
#define GET_TRANSFORMATION_CODE(x) cv::COLOR_##x
#else
#define GET_TRANSFORMATION_CODE(x) CV_##x
#endif
namespace tc = triton::client;
namespace {
enum ScaleType { NONE = 0, VGG = 1, INCEPTION = 2 };
enum ProtocolType { HTTP = 0, GRPC = 1 };
struct ModelInfo {
std::string output_name_;
std::string input_name_;
std::string input_datatype_;
// The shape of the input
int input_c_;
int input_h_;
int input_w_;
// The format of the input
std::string input_format_;
int type1_;
int type3_;
int max_batch_size_;
};
void
Preprocess(
const cv::Mat& img, const std::string& format, int img_type1, int img_type3,
size_t img_channels, const cv::Size& img_size, const ScaleType scale,
std::vector<uint8_t>* input_data)
{
// Image channels are in BGR order. Currently model configuration
// data doesn't provide any information as to the expected channel
// orderings (like RGB, BGR). We are going to assume that RGB is the
// most likely ordering and so change the channels to that ordering.
cv::Mat sample;
if ((img.channels() == 3) && (img_channels == 1)) {
cv::cvtColor(img, sample, GET_TRANSFORMATION_CODE(BGR2GRAY));
} else if ((img.channels() == 4) && (img_channels == 1)) {
cv::cvtColor(img, sample, GET_TRANSFORMATION_CODE(BGRA2GRAY));
} else if ((img.channels() == 3) && (img_channels == 3)) {
cv::cvtColor(img, sample, GET_TRANSFORMATION_CODE(BGR2RGB));
} else if ((img.channels() == 4) && (img_channels == 3)) {
cv::cvtColor(img, sample, GET_TRANSFORMATION_CODE(BGRA2RGB));
} else if ((img.channels() == 1) && (img_channels == 3)) {
cv::cvtColor(img, sample, GET_TRANSFORMATION_CODE(GRAY2RGB));
} else {
std::cerr << "unexpected number of channels " << img.channels()
<< " in input image, model expects " << img_channels << "."
<< std::endl;
exit(1);
}
cv::Mat sample_resized;
if (sample.size() != img_size) {
cv::resize(sample, sample_resized, img_size);
} else {
sample_resized = sample;
}
cv::Mat sample_type;
sample_resized.convertTo(
sample_type, (img_channels == 3) ? img_type3 : img_type1);
cv::Mat sample_final;
if (scale == ScaleType::INCEPTION) {
if (img_channels == 1) {
sample_final = sample_type.mul(cv::Scalar(1 / 127.5));
sample_final = sample_final - cv::Scalar(1.0);
} else {
sample_final =
sample_type.mul(cv::Scalar(1 / 127.5, 1 / 127.5, 1 / 127.5));
sample_final = sample_final - cv::Scalar(1.0, 1.0, 1.0);
}
} else if (scale == ScaleType::VGG) {
if (img_channels == 1) {
sample_final = sample_type - cv::Scalar(128);
} else {
sample_final = sample_type - cv::Scalar(123, 117, 104);
}
} else {
sample_final = sample_type;
}
// Allocate a buffer to hold all image elements.
size_t img_byte_size = sample_final.total() * sample_final.elemSize();
size_t pos = 0;
input_data->resize(img_byte_size);
// For NHWC format Mat is already in the correct order but need to
// handle both cases of data being contiguous or not.
if (format.compare("FORMAT_NHWC") == 0) {
if (sample_final.isContinuous()) {
memcpy(&((*input_data)[0]), sample_final.datastart, img_byte_size);
pos = img_byte_size;
} else {
size_t row_byte_size = sample_final.cols * sample_final.elemSize();
for (int r = 0; r < sample_final.rows; ++r) {
memcpy(
&((*input_data)[pos]), sample_final.ptr<uint8_t>(r), row_byte_size);
pos += row_byte_size;
}
}
} else {
// (format.compare("FORMAT_NCHW") == 0)
//
// For CHW formats must split out each channel from the matrix and
// order them as BBBB...GGGG...RRRR. To do this split the channels
// of the image directly into 'input_data'. The BGR channels are
// backed by the 'input_data' vector so that ends up with CHW
// order of the data.
std::vector<cv::Mat> input_bgr_channels;
for (size_t i = 0; i < img_channels; ++i) {
input_bgr_channels.emplace_back(
img_size.height, img_size.width, img_type1, &((*input_data)[pos]));
pos += input_bgr_channels.back().total() *
input_bgr_channels.back().elemSize();
}
cv::split(sample_final, input_bgr_channels);
}
if (pos != img_byte_size) {
std::cerr << "unexpected total size of channels " << pos << ", expecting "
<< img_byte_size << std::endl;
exit(1);
}
}
void
Postprocess(
const std::unique_ptr<tc::InferResult> result,
const std::vector<std::string>& filenames, const size_t batch_size,
const std::string& output_name, const size_t topk, const bool batching)
{
if (!result->RequestStatus().IsOk()) {
std::cerr << "inference failed with error: " << result->RequestStatus()
<< std::endl;
exit(1);
}
if (filenames.size() != batch_size) {
std::cerr << "expected " << batch_size << " filenames, got "
<< filenames.size() << std::endl;
exit(1);
}
// Get and validate the shape and datatype
std::vector<int64_t> shape;
tc::Error err = result->Shape(output_name, &shape);
if (!err.IsOk()) {
std::cerr << "unable to get shape for " << output_name << std::endl;
exit(1);
}
// Validate shape. Special handling for non-batch model
if (!batching) {
if ((shape.size() != 1) || (shape[0] != (int)topk)) {
std::cerr << "received incorrect shape for " << output_name << std::endl;
exit(1);
}
} else {
if ((shape.size() != 2) || (shape[0] != (int)batch_size) ||
(shape[1] != (int)topk)) {
std::cerr << "received incorrect shape for " << output_name << std::endl;
exit(1);
}
}
std::string datatype;
err = result->Datatype(output_name, &datatype);
if (!err.IsOk()) {
std::cerr << "unable to get datatype for " << output_name << std::endl;
exit(1);
}
// Validate datatype
if (datatype.compare("BYTES") != 0) {
std::cerr << "received incorrect datatype for " << output_name << ": "
<< datatype << std::endl;
exit(1);
}
std::vector<std::string> result_data;
err = result->StringData(output_name, &result_data);
if (!err.IsOk()) {
std::cerr << "unable to get data for " << output_name << std::endl;
exit(1);
}
if (result_data.size() != (topk * batch_size)) {
std::cerr << "unexpected number of strings in the result, expected "
<< (topk * batch_size) << ", got " << result_data.size()
<< std::endl;
exit(1);
}
size_t index = 0;
for (size_t b = 0; b < batch_size; ++b) {
std::cout << "Image '" << filenames[b] << "':" << std::endl;
for (size_t c = 0; c < topk; ++c) {
std::istringstream is(result_data[index]);
int count = 0;
std::string token;
while (getline(is, token, ':')) {
if (count == 0) {
std::cout << " " << token;
} else if (count == 1) {
std::cout << " (" << token << ")";
} else if (count == 2) {
std::cout << " = " << token;
}
count++;
}
std::cout << std::endl;
index++;
}
}
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0]
<< " [options] <image filename / image folder>" << std::endl;
std::cerr << " Note that image folder should only contain image files."
<< std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-a" << std::endl;
std::cerr << "\t--streaming" << std::endl;
std::cerr << "\t-b <batch size>" << std::endl;
std::cerr << "\t-c <topk>" << std::endl;
std::cerr << "\t-s <NONE|INCEPTION|VGG>" << std::endl;
std::cerr << "\t-p <preprocessed output filename>" << std::endl;
std::cerr << "\t-m <model name>" << std::endl;
std::cerr << "\t-x <model version>" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-i <Protocol used to communicate with inference service>"
<< std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr << "If -a is specified then asynchronous client API will be used. "
<< "Default is to use the synchronous API." << std::endl;
std::cerr << "The --streaming flag is only valid with gRPC protocol."
<< std::endl;
std::cerr
<< "For -b, a single image will be replicated and sent in a batch"
<< std::endl
<< " of the specified size. A directory of images will be grouped"
<< std::endl
<< " into batches. Default is 1." << std::endl;
std::cerr << "For -c, the <topk> classes will be returned, default is 1."
<< std::endl;
std::cerr << "For -s, specify the type of pre-processing scaling that"
<< std::endl
<< " should be performed on the image, default is NONE."
<< std::endl
<< " INCEPTION: scale each pixel RGB value to [-1.0, 1.0)."
<< std::endl
<< " VGG: subtract mean BGR value (123, 117, 104) from"
<< std::endl
<< " each pixel." << std::endl;
std::cerr
<< "If -x is not specified the most recent version (that is, the highest "
<< "numbered version) of the model will be used." << std::endl;
std::cerr << "For -p, it generates file only if image file is specified."
<< std::endl;
std::cerr << "For -u, the default server URL is localhost:8000." << std::endl;
std::cerr << "For -i, available protocols are gRPC and HTTP. Default is HTTP."
<< std::endl;
std::cerr
<< "For -H, the header will be added to HTTP requests (ignored for GRPC "
"requests). The header must be specified as 'Header:Value'. -H may be "
"specified multiple times to add multiple headers."
<< std::endl;
std::cerr << std::endl;
exit(1);
}
ScaleType
ParseScale(const std::string& str)
{
if (str == "NONE") {
return ScaleType::NONE;
} else if (str == "INCEPTION") {
return ScaleType::INCEPTION;
} else if (str == "VGG") {
return ScaleType::VGG;
}
std::cerr << "unexpected scale type \"" << str
<< "\", expecting NONE, INCEPTION or VGG" << std::endl;
exit(1);
return ScaleType::NONE;
}
ProtocolType
ParseProtocol(const std::string& str)
{
std::string protocol(str);
std::transform(protocol.begin(), protocol.end(), protocol.begin(), ::tolower);
if (protocol == "http") {
return ProtocolType::HTTP;
} else if (protocol == "grpc") {
return ProtocolType::GRPC;
}
std::cerr << "unexpected protocol type \"" << str
<< "\", expecting HTTP or gRPC" << std::endl;
exit(1);
return ProtocolType::HTTP;
}
bool
ParseType(const std::string& dtype, int* type1, int* type3)
{
if (dtype.compare("UINT8") == 0) {
*type1 = CV_8UC1;
*type3 = CV_8UC3;
} else if (dtype.compare("INT8") == 0) {
*type1 = CV_8SC1;
*type3 = CV_8SC3;
} else if (dtype.compare("UINT16") == 0) {
*type1 = CV_16UC1;
*type3 = CV_16UC3;
} else if (dtype.compare("INT16") == 0) {
*type1 = CV_16SC1;
*type3 = CV_16SC3;
} else if (dtype.compare("INT32") == 0) {
*type1 = CV_32SC1;
*type3 = CV_32SC3;
} else if (dtype.compare("FP32") == 0) {
*type1 = CV_32FC1;
*type3 = CV_32FC3;
} else if (dtype.compare("FP64") == 0) {
*type1 = CV_64FC1;
*type3 = CV_64FC3;
} else {
return false;
}
return true;
}
void
ParseModelGrpc(
const inference::ModelMetadataResponse& model_metadata,
const inference::ModelConfigResponse& model_config, const size_t batch_size,
ModelInfo* model_info)
{
if (model_metadata.inputs().size() != 1) {
std::cerr << "expecting 1 input, got " << model_metadata.inputs().size()
<< std::endl;
exit(1);
}
if (model_metadata.outputs().size() != 1) {
std::cerr << "expecting 1 output, got " << model_metadata.outputs().size()
<< std::endl;
exit(1);
}
if (model_config.config().input().size() != 1) {
std::cerr << "expecting 1 input in model configuration, got "
<< model_config.config().input().size() << std::endl;
exit(1);
}
auto input_metadata = model_metadata.inputs(0);
auto input_config = model_config.config().input(0);
auto output_metadata = model_metadata.outputs(0);
if (output_metadata.datatype().compare("FP32") != 0) {
std::cerr << "expecting output datatype to be FP32, model '"
<< model_metadata.name() << "' output type is '"
<< output_metadata.datatype() << "'" << std::endl;
exit(1);
}
model_info->max_batch_size_ = model_config.config().max_batch_size();
// Model specifying maximum batch size of 0 indicates that batching
// is not supported and so the input tensors do not expect a "N"
// dimension (and 'batch_size' should be 1 so that only a single
// image instance is inferred at a time).
if (model_info->max_batch_size_ == 0) {
if (batch_size != 1) {
std::cerr << "batching not supported for model \""
<< model_metadata.name() << "\"" << std::endl;
exit(1);
}
} else {
// model_info->max_batch_size_ > 0
if (batch_size > (size_t)model_info->max_batch_size_) {
std::cerr << "expecting batch size <= " << model_info->max_batch_size_
<< " for model '" << model_metadata.name() << "'" << std::endl;
exit(1);
}
}
// Output is expected to be a vector. But allow any number of
// dimensions as long as all but 1 is size 1 (e.g. { 10 }, { 1, 10
// }, { 10, 1, 1 } are all ok).
bool output_batch_dim = (model_info->max_batch_size_ > 0);
size_t non_one_cnt = 0;
for (const auto dim : output_metadata.shape()) {
if (output_batch_dim) {
output_batch_dim = false;
} else if (dim == -1) {
std::cerr << "variable-size dimension in model output not supported"
<< std::endl;
exit(1);
} else if (dim > 1) {
non_one_cnt += 1;
if (non_one_cnt > 1) {
std::cerr << "expecting model output to be a vector" << std::endl;
exit(1);
}
}
}
// Model input must have 3 dims, either CHW or HWC (not counting the
// batch dimension), either CHW or HWC
const bool input_batch_dim = (model_info->max_batch_size_ > 0);
const int expected_input_dims = 3 + (input_batch_dim ? 1 : 0);
if (input_metadata.shape().size() != expected_input_dims) {
std::cerr << "expecting input to have " << expected_input_dims
<< " dimensions, model '" << model_metadata.name()
<< "' input has " << input_metadata.shape().size() << std::endl;
exit(1);
}
if ((input_config.format() != inference::ModelInput::FORMAT_NCHW) &&
(input_config.format() != inference::ModelInput::FORMAT_NHWC)) {
std::cerr
<< "unexpected input format "
<< inference::ModelInput_Format_Name(input_config.format())
<< ", expecting "
<< inference::ModelInput_Format_Name(inference::ModelInput::FORMAT_NHWC)
<< " or "
<< inference::ModelInput_Format_Name(inference::ModelInput::FORMAT_NCHW)
<< std::endl;
exit(1);
}
model_info->output_name_ = output_metadata.name();
model_info->input_name_ = input_metadata.name();
model_info->input_datatype_ = input_metadata.datatype();
if (input_config.format() == inference::ModelInput::FORMAT_NHWC) {
model_info->input_format_ = "FORMAT_NHWC";
model_info->input_h_ = input_metadata.shape(input_batch_dim ? 1 : 0);
model_info->input_w_ = input_metadata.shape(input_batch_dim ? 2 : 1);
model_info->input_c_ = input_metadata.shape(input_batch_dim ? 3 : 2);
} else {
model_info->input_format_ = "FORMAT_NCHW";
model_info->input_c_ = input_metadata.shape(input_batch_dim ? 1 : 0);
model_info->input_h_ = input_metadata.shape(input_batch_dim ? 2 : 1);
model_info->input_w_ = input_metadata.shape(input_batch_dim ? 3 : 2);
}
if (!ParseType(
model_info->input_datatype_, &(model_info->type1_),
&(model_info->type3_))) {
std::cerr << "unexpected input datatype '" << model_info->input_datatype_
<< "' for model \"" << model_metadata.name() << std::endl;
exit(1);
}
}
void
ParseModelHttp(
const rapidjson::Document& model_metadata,
const rapidjson::Document& model_config, const size_t batch_size,
ModelInfo* model_info)
{
const auto& input_itr = model_metadata.FindMember("inputs");
size_t input_count = 0;
if (input_itr != model_metadata.MemberEnd()) {
input_count = input_itr->value.Size();
}
if (input_count != 1) {
std::cerr << "expecting 1 input, got " << input_count << std::endl;
exit(1);
}
const auto& output_itr = model_metadata.FindMember("outputs");
size_t output_count = 0;
if (output_itr != model_metadata.MemberEnd()) {
output_count = output_itr->value.Size();
}
if (output_count != 1) {
std::cerr << "expecting 1 output, got " << output_count << std::endl;
exit(1);
}
const auto& input_config_itr = model_config.FindMember("input");
input_count = 0;
if (input_config_itr != model_config.MemberEnd()) {
input_count = input_config_itr->value.Size();
}
if (input_count != 1) {
std::cerr << "expecting 1 input in model configuration, got " << input_count
<< std::endl;
exit(1);
}
const auto& input_metadata = *input_itr->value.Begin();
const auto& input_config = *input_config_itr->value.Begin();
const auto& output_metadata = *output_itr->value.Begin();
const auto& output_dtype_itr = output_metadata.FindMember("datatype");
if (output_dtype_itr == output_metadata.MemberEnd()) {
std::cerr << "output missing datatype in the metadata for model'"
<< model_metadata["name"].GetString() << "'" << std::endl;
exit(1);
}
auto datatype = std::string(
output_dtype_itr->value.GetString(),
output_dtype_itr->value.GetStringLength());
if (datatype.compare("FP32") != 0) {
std::cerr << "expecting output datatype to be FP32, model '"
<< model_metadata["name"].GetString() << "' output type is '"
<< datatype << "'" << std::endl;
exit(1);
}
int max_batch_size = 0;
const auto bs_itr = model_config.FindMember("max_batch_size");
if (bs_itr != model_config.MemberEnd()) {
max_batch_size = bs_itr->value.GetUint();
}
model_info->max_batch_size_ = max_batch_size;
// Model specifying maximum batch size of 0 indicates that batching
// is not supported and so the input tensors do not expect a "N"
// dimension (and 'batch_size' should be 1 so that only a single
// image instance is inferred at a time).
if (max_batch_size == 0) {
if (batch_size != 1) {
std::cerr << "batching not supported for model '"
<< model_metadata["name"].GetString() << "'" << std::endl;
exit(1);
}
} else {
// max_batch_size > 0
if (batch_size > (size_t)max_batch_size) {
std::cerr << "expecting batch size <= " << max_batch_size
<< " for model '" << model_metadata["name"].GetString() << "'"
<< std::endl;
exit(1);
}
}
// Output is expected to be a vector. But allow any number of
// dimensions as long as all but 1 is size 1 (e.g. { 10 }, { 1, 10
// }, { 10, 1, 1 } are all ok).
bool output_batch_dim = (max_batch_size > 0);
size_t non_one_cnt = 0;
const auto output_shape_itr = output_metadata.FindMember("shape");
if (output_shape_itr != output_metadata.MemberEnd()) {
const rapidjson::Value& shape_json = output_shape_itr->value;
for (rapidjson::SizeType i = 0; i < shape_json.Size(); i++) {
if (output_batch_dim) {
output_batch_dim = false;
} else if (shape_json[i].GetInt() == -1) {
std::cerr << "variable-size dimension in model output not supported"
<< std::endl;
exit(1);
} else if (shape_json[i].GetInt() > 1) {
non_one_cnt += 1;
if (non_one_cnt > 1) {
std::cerr << "expecting model output to be a vector" << std::endl;
exit(1);
}
}
}
} else {
std::cerr << "output missing shape in the metadata for model'"
<< model_metadata["name"].GetString() << "'" << std::endl;
exit(1);
}
// Model input must have 3 dims, either CHW or HWC (not counting the
// batch dimension), either CHW or HWC
const bool input_batch_dim = (max_batch_size > 0);
const size_t expected_input_dims = 3 + (input_batch_dim ? 1 : 0);
const auto input_shape_itr = input_metadata.FindMember("shape");
if (input_shape_itr != input_metadata.MemberEnd()) {
if (input_shape_itr->value.Size() != expected_input_dims) {
std::cerr << "expecting input to have " << expected_input_dims
<< " dimensions, model '" << model_metadata["name"].GetString()
<< "' input has " << input_shape_itr->value.Size() << std::endl;
exit(1);
}
} else {
std::cerr << "input missing shape in the metadata for model'"
<< model_metadata["name"].GetString() << "'" << std::endl;
exit(1);
}
model_info->input_format_ = std::string(
input_config["format"].GetString(),
input_config["format"].GetStringLength());
if ((model_info->input_format_.compare("FORMAT_NCHW") != 0) &&
(model_info->input_format_.compare("FORMAT_NHWC") != 0)) {
std::cerr << "unexpected input format " << model_info->input_format_
<< ", expecting FORMAT_NCHW or FORMAT_NHWC" << std::endl;
exit(1);
}
model_info->output_name_ = std::string(
output_metadata["name"].GetString(),
output_metadata["name"].GetStringLength());
model_info->input_name_ = std::string(
input_metadata["name"].GetString(),
input_metadata["name"].GetStringLength());
model_info->input_datatype_ = std::string(
input_metadata["datatype"].GetString(),
input_metadata["datatype"].GetStringLength());
if (model_info->input_format_.compare("FORMAT_NHWC") == 0) {
model_info->input_h_ =
input_shape_itr->value[input_batch_dim ? 1 : 0].GetInt();
model_info->input_w_ =
input_shape_itr->value[input_batch_dim ? 2 : 1].GetInt();
model_info->input_c_ =
input_shape_itr->value[input_batch_dim ? 3 : 2].GetInt();
} else {
model_info->input_c_ =
input_shape_itr->value[input_batch_dim ? 1 : 0].GetInt();
model_info->input_h_ =
input_shape_itr->value[input_batch_dim ? 2 : 1].GetInt();
model_info->input_w_ =
input_shape_itr->value[input_batch_dim ? 3 : 2].GetInt();
}
if (!ParseType(
model_info->input_datatype_, &(model_info->type1_),
&(model_info->type3_))) {
std::cerr << "unexpected input datatype '" << model_info->input_datatype_
<< "' for model \"" << model_metadata["name"].GetString()
<< std::endl;
exit(1);
}
}
void
FileToInputData(
const std::string& filename, size_t c, size_t h, size_t w,
const std::string& format, int type1, int type3, ScaleType scale,
std::vector<uint8_t>* input_data)
{
// Load the specified image.
std::ifstream file(filename);
std::vector<char> data;
file >> std::noskipws;
std::copy(
std::istream_iterator<char>(file), std::istream_iterator<char>(),
std::back_inserter(data));
if (data.empty()) {
std::cerr << "error: unable to read image file " << filename << std::endl;
exit(1);
}
cv::Mat img = imdecode(cv::Mat(data), 1);
if (img.empty()) {
std::cerr << "error: unable to decode image " << filename << std::endl;
exit(1);
}
// Pre-process the image to match input size expected by the model.
Preprocess(img, format, type1, type3, c, cv::Size(w, h), scale, input_data);
}
union TritonClient {
TritonClient()
{
new (&http_client_) std::unique_ptr<tc::InferenceServerHttpClient>{};
}
~TritonClient() {}
std::unique_ptr<tc::InferenceServerHttpClient> http_client_;
std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_;
};
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
bool async = false;
bool streaming = false;
int batch_size = 1;
int topk = 1;
ScaleType scale = ScaleType::NONE;
std::string preprocess_output_filename;
std::string model_name;
std::string model_version = "";
std::string url("localhost:8000");
ProtocolType protocol = ProtocolType::HTTP;
tc::Headers http_headers;
static struct option long_options[] = {{"streaming", 0, 0, 0}, {0, 0, 0, 0}};
// Parse commandline...
int opt;
while ((opt = getopt_long(
argc, argv, "vau:m:x:b:c:s:p:i:H:", long_options, NULL)) != -1) {
switch (opt) {
case 0:
streaming = true;
break;
case 'v':
verbose = true;
break;
case 'a':
async = true;
break;
case 'u':
url = optarg;
break;
case 'm':
model_name = optarg;
break;
case 'x':
model_version = optarg;
break;
case 'b':
batch_size = std::atoi(optarg);
break;
case 'c':
topk = std::atoi(optarg);
break;
case 's':
scale = ParseScale(optarg);
break;
case 'p':
preprocess_output_filename = optarg;
break;
case 'i':
protocol = ParseProtocol(optarg);
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
if (model_name.empty()) {
Usage(argv, "-m flag must be specified");
}
if (batch_size <= 0) {
Usage(argv, "batch size must be > 0");
}
if (topk <= 0) {
Usage(argv, "topk must be > 0");
}
if (optind >= argc) {
Usage(argv, "image file or image folder must be specified");
}
if (streaming && (protocol != ProtocolType::GRPC)) {
Usage(argv, "Streaming is only allowed with gRPC protocol");
}
if (streaming && (!async)) {
Usage(argv, "Only async operation is supported in streaming");
}
if (!http_headers.empty() && (protocol != ProtocolType::HTTP)) {
std::cerr << "WARNING: HTTP headers specified with -H are ignored when "
"using non-HTTP protocol."
<< std::endl;
}
// Create the inference client for the server. From it
// extract and validate that the model meets the requirements for
// image classification.
TritonClient triton_client;
tc::Error err;
if (protocol == ProtocolType::HTTP) {
err = tc::InferenceServerHttpClient::Create(
&triton_client.http_client_, url, verbose);
} else {
err = tc::InferenceServerGrpcClient::Create(
&triton_client.grpc_client_, url, verbose);
}
if (!err.IsOk()) {
std::cerr << "error: unable to create client for inference: " << err
<< std::endl;
exit(1);
}
ModelInfo model_info;
if (protocol == ProtocolType::HTTP) {
std::string model_metadata;
err = triton_client.http_client_->ModelMetadata(
&model_metadata, model_name, model_version, http_headers);
if (!err.IsOk()) {
std::cerr << "error: failed to get model metadata: " << err << std::endl;
}
rapidjson::Document model_metadata_json;
err = tc::ParseJson(&model_metadata_json, model_metadata);
if (!err.IsOk()) {
std::cerr << "error: failed to parse model metadata: " << err
<< std::endl;
}
std::string model_config;
err = triton_client.http_client_->ModelConfig(
&model_config, model_name, model_version, http_headers);
if (!err.IsOk()) {
std::cerr << "error: failed to get model config: " << err << std::endl;
}
rapidjson::Document model_config_json;
err = tc::ParseJson(&model_config_json, model_config);
if (!err.IsOk()) {
std::cerr << "error: failed to parse model config: " << err << std::endl;
}
ParseModelHttp(
model_metadata_json, model_config_json, batch_size, &model_info);
} else {
inference::ModelMetadataResponse model_metadata;
err = triton_client.grpc_client_->ModelMetadata(
&model_metadata, model_name, model_version, http_headers);
if (!err.IsOk()) {
std::cerr << "error: failed to get model metadata: " << err << std::endl;
}
inference::ModelConfigResponse model_config;
err = triton_client.grpc_client_->ModelConfig(
&model_config, model_name, model_version, http_headers);
if (!err.IsOk()) {
std::cerr << "error: failed to get model config: " << err << std::endl;
}
ParseModelGrpc(model_metadata, model_config, batch_size, &model_info);
}
// Collect the names of the image(s).
std::vector<std::string> image_filenames;
struct stat name_stat;
if (stat(argv[optind], &name_stat) != 0) {
std::cerr << "Failed to find '" << std::string(argv[optind])
<< "': " << strerror(errno) << std::endl;
exit(1);
}
if (name_stat.st_mode & S_IFDIR) {
const std::string dirname = argv[optind];
DIR* dir_ptr = opendir(dirname.c_str());
struct dirent* d_ptr;
while ((d_ptr = readdir(dir_ptr)) != NULL) {
const std::string filename = d_ptr->d_name;
if ((filename != ".") && (filename != "..")) {
image_filenames.push_back(dirname + "/" + filename);
}
}
closedir(dir_ptr);
} else {
image_filenames.push_back(argv[optind]);
}
// Sort the filenames so that we always visit them in the same order
// (readdir does not guarantee any particular order).
std::sort(image_filenames.begin(), image_filenames.end());
// Preprocess the images into input data according to model
// requirements
std::vector<std::vector<uint8_t>> image_data;
for (const auto& fn : image_filenames) {
image_data.emplace_back();
FileToInputData(
fn, model_info.input_c_, model_info.input_h_, model_info.input_w_,
model_info.input_format_, model_info.type1_, model_info.type3_, scale,
&(image_data.back()));
if ((image_data.size() == 1) && !preprocess_output_filename.empty()) {
std::ofstream output_file(preprocess_output_filename);
std::ostream_iterator<uint8_t> output_iterator(output_file);
std::copy(image_data[0].begin(), image_data[0].end(), output_iterator);
}
}
std::vector<int64_t> shape;
// Include the batch dimension if required
if (model_info.max_batch_size_ != 0) {
shape.push_back(batch_size);
}
if (model_info.input_format_.compare("FORMAT_NHWC") == 0) {
shape.push_back(model_info.input_h_);
shape.push_back(model_info.input_w_);
shape.push_back(model_info.input_c_);
} else {
shape.push_back(model_info.input_c_);
shape.push_back(model_info.input_h_);
shape.push_back(model_info.input_w_);
}
// Initialize the inputs with the data.
tc::InferInput* input;
err = tc::InferInput::Create(
&input, model_info.input_name_, shape, model_info.input_datatype_);
if (!err.IsOk()) {
std::cerr << "unable to get input: " << err << std::endl;
exit(1);
}
std::shared_ptr<tc::InferInput> input_ptr(input);
tc::InferRequestedOutput* output;
// Set the number of classification expected
err =
tc::InferRequestedOutput::Create(&output, model_info.output_name_, topk);
if (!err.IsOk()) {
std::cerr << "unable to get output: " << err << std::endl;
exit(1);
}
std::shared_ptr<tc::InferRequestedOutput> output_ptr(output);
std::vector<tc::InferInput*> inputs = {input_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {output_ptr.get()};
// Configure context for 'batch_size' and 'topk'
tc::InferOptions options(model_name);
options.model_version_ = model_version;
// Send requests of 'batch_size' images. If the number of images
// isn't an exact multiple of 'batch_size' then just start over with
// the first images until the batch is filled.
//
// Number of requests sent = ceil(number of images / batch_size)
std::vector<std::unique_ptr<tc::InferResult>> results;
std::vector<std::vector<std::string>> result_filenames;
size_t image_idx = 0;
size_t done_cnt = 0;
size_t sent_count = 0;
bool last_request = false;
std::mutex mtx;
std::condition_variable cv;
auto callback_func = [&](tc::InferResult* result) {
{
// Defer the response retrieval to main thread
std::lock_guard<std::mutex> lk(mtx);
results.emplace_back(result);
done_cnt++;
}
cv.notify_all();
};
if (streaming) {
err = triton_client.grpc_client_->StartStream(
callback_func, true /* enable_stats */, 0 /* stream_timeout */,
http_headers);
if (!err.IsOk()) {
std::cerr << "failed to establish the stream: " << err << std::endl;
}
}
while (!last_request) {
// Reset the input for new request.
err = input_ptr->Reset();
if (!err.IsOk()) {
std::cerr << "failed resetting input: " << err << std::endl;
exit(1);
}
// Set input to be the next 'batch_size' images (preprocessed).
std::vector<std::string> input_filenames;
for (int idx = 0; idx < batch_size; ++idx) {
input_filenames.push_back(image_filenames[image_idx]);
err = input_ptr->AppendRaw(image_data[image_idx]);
if (!err.IsOk()) {
std::cerr << "failed setting input: " << err << std::endl;
exit(1);
}
image_idx = (image_idx + 1) % image_data.size();
if (image_idx == 0) {
last_request = true;
}
}
result_filenames.emplace_back(std::move(input_filenames));
options.request_id_ = std::to_string(sent_count);
// Send request.
if (!async) {
tc::InferResult* result;
if (protocol == ProtocolType::HTTP) {
err = triton_client.http_client_->Infer(
&result, options, inputs, outputs, http_headers);
} else {
err = triton_client.grpc_client_->Infer(
&result, options, inputs, outputs, http_headers);
}
if (!err.IsOk()) {
std::cerr << "failed sending synchronous infer request: " << err
<< std::endl;
exit(1);
}
results.emplace_back(result);
} else {
if (streaming) {
err = triton_client.grpc_client_->AsyncStreamInfer(
options, inputs, outputs);
} else {
if (protocol == ProtocolType::HTTP) {
err = triton_client.http_client_->AsyncInfer(
callback_func, options, inputs, outputs, http_headers);
} else {
err = triton_client.grpc_client_->AsyncInfer(
callback_func, options, inputs, outputs, http_headers);
}
}
if (!err.IsOk()) {
std::cerr << "failed sending asynchronous infer request: " << err
<< std::endl;
exit(1);
}
}
sent_count++;
}
// For async, retrieve results according to the send order
if (async) {
// Wait until all callbacks are invoked
{
std::unique_lock<std::mutex> lk(mtx);
cv.wait(lk, [&]() {
if (done_cnt >= sent_count) {
return true;
} else {
return false;
}
});
}
}
// Post-process the results to make prediction(s)
for (size_t idx = 0; idx < results.size(); idx++) {
std::cout << "Request " << idx << ", batch size " << batch_size
<< std::endl;
Postprocess(
std::move(results[idx]), result_filenames[idx], batch_size,
model_info.output_name_, topk, model_info.max_batch_size_ != 0);
}
return 0;
}
#include <dirent.h>
#include <getopt.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#include <algorithm>
#include <condition_variable>
#include <fstream>
#include <iostream>
#include <iterator>
#include <mutex>
#include <queue>
#include <string>
#include "grpc_client.h"
#include "http_client.h"
#include "json_utils.h"
#include <opencv2/opencv.hpp>
#include <opencv2/core/version.hpp>
#if CV_MAJOR_VERSION == 2
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#elif CV_MAJOR_VERSION >= 3
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#endif
#if CV_MAJOR_VERSION == 4
#define GET_TRANSFORMATION_CODE(x) cv::COLOR_##x
#else
#define GET_TRANSFORMATION_CODE(x) CV_##x
#endif
using namespace cv;
namespace tc = triton::client;
namespace {
enum ProtocolType { HTTP = 0, GRPC = 1 };
struct ModelInfo {
std::string output_name_;
std::string input_name_;
std::string input_datatype_;
int input_c_;
int input_h_;
int input_w_;
std::string input_format_;
int type1_;
int type3_;
int max_batch_size_;
};
std::vector<float> ComputeSoftmax(const std::vector<float>& results)
{
float maxValue=-3.40e+38F;
for(int i=0;i<results.size();++i)
{
if(results[i]>maxValue)
{
maxValue=results[i];
}
}
std::vector<float> softmaxResults(results.size());
float sum=0.0;
for(int i=0;i<results.size();++i)
{
softmaxResults[i]= exp((float)(results[i] - maxValue));
sum+=softmaxResults[i];
}
for(int i=0;i<results.size();++i)
{
softmaxResults[i]= softmaxResults[i]/sum;
}
return softmaxResults;
}
void
Preprocess(
const std::string& filename, int img_type1, int img_type3, size_t img_channels,
const cv::Size& img_size, std::vector<uint8_t>* input_data)
{
cv::Mat img = cv::imread(filename, 1);
if (img.empty()) {
std::cerr << "error: unable to decode image " << filename << std::endl;
exit(1);
}
cv::Mat sample;
if ((img.channels() == 3) && (img_channels == 3)) {
cv::cvtColor(img, sample, GET_TRANSFORMATION_CODE(BGR2RGB));
} else {
std::cerr << "unexpected number of channels " << img.channels()
<< " in input image, model expects " << img_channels << "."
<< std::endl;
exit(1);
}
cv::Mat sample_resized;
cv::resize(sample, sample_resized, img_size);
cv::Mat sample_type;
sample_resized.convertTo(sample_type, (img_channels == 3) ? img_type3 : img_type1);
cv::Mat sample_final;
sample_final = sample_type.mul(cv::Scalar(1/58.395, 1/57.12, 1/57.375));
sample_final = sample_final - cv::Scalar(123.675, 116.28, 103.53);
size_t img_byte_size = sample_final.total() * sample_final.elemSize();
size_t pos = 0;
input_data->resize(img_byte_size);
std::vector<cv::Mat> input_bgr_channels;
for (size_t i = 0; i < img_channels; ++i) {
input_bgr_channels.emplace_back(img_size.height, img_size.width, img_type1, &((*input_data)[pos]));
pos += input_bgr_channels.back().total() * input_bgr_channels.back().elemSize();
}
cv::split(sample_final, input_bgr_channels);
if (pos != img_byte_size) {
std::cerr << "unexpected total size of channels " << pos << ", expecting "
<< img_byte_size << std::endl;
exit(1);
}
}
void Postprocess(
const std::unique_ptr<tc::InferResult> result,
const std::vector<std::string>& filenames, const size_t batch_size,
const std::string& output_name, const bool batching)
{
if (!result->RequestStatus().IsOk()) {
std::cerr << "inference failed with error: " << result->RequestStatus()
<< std::endl;
exit(1);
}
if (filenames.size() != batch_size) {
std::cerr << "expected " << batch_size << " filenames, got "
<< filenames.size() << std::endl;
exit(1);
}
// Get and validate the shape and datatype
std::vector<int64_t> shape;
tc::Error err = result->Shape(output_name, &shape);
if (!err.IsOk()) {
std::cerr << "unable to get shape for " << output_name << std::endl;
exit(1);
}
std::string datatype;
err = result->Datatype(output_name, &datatype);
if (!err.IsOk()) {
std::cerr << "unable to get datatype for " << output_name << std::endl;
exit(1);
}
const uint8_t* result_data;
size_t outputCount = 0;
err = result->RawData(output_name, &result_data, &outputCount);
if (!err.IsOk()) {
std::cerr << "unable to get data for " << output_name << std::endl;
exit(1);
}
float* pdata = new float[1000];
memcpy(pdata, result_data, sizeof(uint8_t)*outputCount);
std::vector<float> logit;
for(int j=0; j<1000; ++j)
{
logit.push_back(pdata[j]);
}
std::vector<float> probs = ComputeSoftmax(logit);
for(int j=0; j<1000; ++j)
{
if (probs[j] >= 0.5)
fprintf(stdout,"label:%d,confidence:%.3f\n", j, probs[j]);
}
}
bool ParseType(const std::string& dtype, int* type1, int* type3)
{
if (dtype.compare("UINT8") == 0) {
*type1 = CV_8UC1;
*type3 = CV_8UC3;
} else if (dtype.compare("INT8") == 0) {
*type1 = CV_8SC1;
*type3 = CV_8SC3;
} else if (dtype.compare("UINT16") == 0) {
*type1 = CV_16UC1;
*type3 = CV_16UC3;
} else if (dtype.compare("INT16") == 0) {
*type1 = CV_16SC1;
*type3 = CV_16SC3;
} else if (dtype.compare("INT32") == 0) {
*type1 = CV_32SC1;
*type3 = CV_32SC3;
} else if (dtype.compare("FP32") == 0) {
*type1 = CV_32FC1;
*type3 = CV_32FC3;
} else if (dtype.compare("FP64") == 0) {
*type1 = CV_64FC1;
*type3 = CV_64FC3;
} else {
return false;
}
return true;
}
void ParseModelHttp(
const rapidjson::Document& model_metadata,
const rapidjson::Document& model_config, const size_t batch_size,
ModelInfo* model_info)
{
const auto& input_itr = model_metadata.FindMember("inputs");
size_t input_count = 0;
if (input_itr != model_metadata.MemberEnd()) {
input_count = input_itr->value.Size();
}
if (input_count != 1) {
std::cerr << "expecting 1 input, got " << input_count << std::endl;
exit(1);
}
const auto& output_itr = model_metadata.FindMember("outputs");
size_t output_count = 0;
if (output_itr != model_metadata.MemberEnd()) {
output_count = output_itr->value.Size();
}
if (output_count != 1) {
std::cerr << "expecting 1 output, got " << output_count << std::endl;
exit(1);
}
const auto& input_config_itr = model_config.FindMember("input");
input_count = 0;
if (input_config_itr != model_config.MemberEnd()) {
input_count = input_config_itr->value.Size();
}
if (input_count != 1) {
std::cerr << "expecting 1 input in model configuration, got " << input_count
<< std::endl;
exit(1);
}
const auto& input_metadata = *input_itr->value.Begin();
const auto& input_config = *input_config_itr->value.Begin();
const auto& output_metadata = *output_itr->value.Begin();
const auto& output_dtype_itr = output_metadata.FindMember("datatype");
if (output_dtype_itr == output_metadata.MemberEnd()) {
std::cerr << "output missing datatype in the metadata for model'"
<< model_metadata["name"].GetString() << "'" << std::endl;
exit(1);
}
auto datatype = std::string(output_dtype_itr->value.GetString(),
output_dtype_itr->value.GetStringLength());
if (datatype.compare("FP32") != 0) {
std::cerr << "expecting output datatype to be FP32, model '"
<< model_metadata["name"].GetString() << "' output type is '"
<< datatype << "'" << std::endl;
exit(1);
}
int max_batch_size = 0;
const auto bs_itr = model_config.FindMember("max_batch_size");
if (bs_itr != model_config.MemberEnd()) {
max_batch_size = bs_itr->value.GetUint();
}
model_info->max_batch_size_ = max_batch_size;
if (max_batch_size == 0) {
if (batch_size != 1) {
std::cerr << "batching not supported for model '"
<< model_metadata["name"].GetString() << "'" << std::endl;
exit(1);
}
} else {
if (batch_size > (size_t)max_batch_size) {
std::cerr << "expecting batch size <= " << max_batch_size
<< " for model '" << model_metadata["name"].GetString() << "'"
<< std::endl;
exit(1);
}
}
const bool input_batch_dim = (max_batch_size == 0);
const size_t expected_input_dims = 3 + (input_batch_dim ? 1 : 0);
const auto input_shape_itr = input_metadata.FindMember("shape");
model_info->input_format_ = std::string(input_config["format"].GetString(), input_config["format"].GetStringLength());
model_info->output_name_ = std::string(output_metadata["name"].GetString(), output_metadata["name"].GetStringLength());
model_info->input_name_ = std::string(input_metadata["name"].GetString(), input_metadata["name"].GetStringLength());
model_info->input_datatype_ = std::string(input_metadata["datatype"].GetString(), input_metadata["datatype"].GetStringLength());
model_info->input_c_ = input_shape_itr->value[1].GetInt();
model_info->input_h_ = input_shape_itr->value[2].GetInt();
model_info->input_w_ = input_shape_itr->value[3].GetInt();
if (!ParseType(model_info->input_datatype_, &(model_info->type1_), &(model_info->type3_))) {
std::cerr << "unexpected input datatype '" << model_info->input_datatype_
<< "' for model \"" << model_metadata["name"].GetString()
<< std::endl;
exit(1);
}
}
union TritonClient {
TritonClient()
{
new (&http_client_) std::unique_ptr<tc::InferenceServerHttpClient>{};
}
~TritonClient() {}
std::unique_ptr<tc::InferenceServerHttpClient> http_client_;
std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_;
};
}
int
main(int argc, char** argv)
{
bool verbose = false;
bool async = false;
int batch_size = 1;
if (argc < 3 || argc > 3)
{
fprintf(stdout, "Two args are required: ./a resnet_50 image_path\n");
return -1;
}
std::string model_name = argv[1];
std::string fileName = argv[2];
std::string preprocess_output_filename;
std::string model_version = "";
std::string url("localhost:8000");
ProtocolType protocol = ProtocolType::HTTP;
tc::Headers http_headers;
TritonClient triton_client;
tc::Error err;
err = tc::InferenceServerHttpClient::Create(
&triton_client.http_client_, url, verbose);
if (!err.IsOk()) {
std::cerr << "error: unable to create client for inference: " << err << std::endl;
exit(1);
}
ModelInfo model_info;
std::string model_metadata;
err = triton_client.http_client_->ModelMetadata(&model_metadata, model_name, model_version, http_headers);
if (!err.IsOk()) {
std::cerr << "error: failed to get model metadata: " << err << std::endl;
}
rapidjson::Document model_metadata_json;
err = tc::ParseJson(&model_metadata_json, model_metadata);
if (!err.IsOk()) {
std::cerr << "error: failed to parse model metadata: " << err
<< std::endl;
}
std::string model_config;
err = triton_client.http_client_->ModelConfig(&model_config, model_name, model_version, http_headers);
if (!err.IsOk()) {
std::cerr << "error: failed to get model config: " << err << std::endl;
}
rapidjson::Document model_config_json;
err = tc::ParseJson(&model_config_json, model_config);
if (!err.IsOk()) {
std::cerr << "error: failed to parse model config: " << err << std::endl;
}
ParseModelHttp( model_metadata_json, model_config_json, batch_size, &model_info);
std::vector<std::string> image_filenames;
struct stat name_stat;
if (stat(fileName.c_str(), &name_stat) != 0) {
std::cerr << "Failed to find '" << fileName << "': " << strerror(errno) << std::endl;
exit(1);
}
if (name_stat.st_mode & S_IFDIR) {
const std::string dirname = fileName;
DIR* dir_ptr = opendir(dirname.c_str());
struct dirent* d_ptr;
while ((d_ptr = readdir(dir_ptr)) != NULL) {
const std::string filename = d_ptr->d_name;
if ((filename != ".") && (filename != "..")) {
image_filenames.push_back(dirname + "/" + filename);
}
}
closedir(dir_ptr);
} else {
image_filenames.push_back(fileName);
}
std::sort(image_filenames.begin(), image_filenames.end());
std::vector<std::vector<uint8_t>> image_data;
for (const auto& fn : image_filenames) {
image_data.emplace_back();
Preprocess(fn, model_info.type1_, model_info.type3_, model_info.input_c_,
cv::Size(model_info.input_w_, model_info.input_h_), &(image_data.back()));
if ((image_data.size() == 1) && !preprocess_output_filename.empty()) {
std::ofstream output_file(preprocess_output_filename);
std::ostream_iterator<uint8_t> output_iterator(output_file);
std::copy(image_data[0].begin(), image_data[0].end(), output_iterator);
}
}
std::vector<int64_t> shape;
shape.push_back(batch_size);
shape.push_back(model_info.input_c_);
shape.push_back(model_info.input_h_);
shape.push_back(model_info.input_w_);
tc::InferInput* input;
err = tc::InferInput::Create(&input, model_info.input_name_, shape, model_info.input_datatype_);
if (!err.IsOk()) {
std::cerr << "unable to get input: " << err << std::endl;
exit(1);
}
std::shared_ptr<tc::InferInput> input_ptr(input);
tc::InferRequestedOutput* output;
err = tc::InferRequestedOutput::Create(&output, model_info.output_name_);
if (!err.IsOk()) {
std::cerr << "unable to get output: " << err << std::endl;
exit(1);
}
std::shared_ptr<tc::InferRequestedOutput> output_ptr(output);
std::vector<tc::InferInput*> inputs = {input_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {output_ptr.get()};
tc::InferOptions options(model_name);
options.model_version_ = model_version;
std::vector<std::unique_ptr<tc::InferResult>> results;
std::vector<std::vector<std::string>> result_filenames;
size_t image_idx = 0;
size_t done_cnt = 0;
size_t sent_count = 0;
bool last_request = false;
std::mutex mtx;
std::condition_variable cv;
auto callback_func = [&](tc::InferResult* result)
{
{
std::lock_guard<std::mutex> lk(mtx);
results.emplace_back(result);
done_cnt++;
}
cv.notify_all();
};
while (!last_request) {
err = input_ptr->Reset();
if (!err.IsOk()) {
std::cerr << "failed resetting input: " << err << std::endl;
exit(1);
}
std::vector<std::string> input_filenames;
for (int idx = 0; idx < batch_size; ++idx) {
input_filenames.push_back(image_filenames[image_idx]);
err = input_ptr->AppendRaw(image_data[image_idx]);
if (!err.IsOk()) {
std::cerr << "failed setting input: " << err << std::endl;
exit(1);
}
image_idx = (image_idx + 1) % image_data.size();
if (image_idx == 0) {
last_request = true;
}
}
result_filenames.emplace_back(std::move(input_filenames));
options.request_id_ = std::to_string(sent_count);
double time1 = getTickCount();
tc::InferResult* result;
if (protocol == ProtocolType::HTTP) {
err = triton_client.http_client_->Infer(
&result, options, inputs, outputs, http_headers);
} else {
err = triton_client.grpc_client_->Infer(
&result, options, inputs, outputs, http_headers);
}
if (!err.IsOk()) {
std::cerr << "failed sending synchronous infer request: " << err
<< std::endl;
exit(1);
}
results.emplace_back(result);
double time2 = getTickCount();
double elapsedTime = (time2 - time1)*1000 / getTickFrequency();
fprintf(stdout, "inference time:%f ms\n", elapsedTime);
sent_count++;
}
for (size_t idx = 0; idx < results.size(); idx++) {
std::cout << "Request " << idx << ", batch size " << batch_size << std::endl;
Postprocess(
std::move(results[idx]), result_filenames[idx], batch_size,
model_info.output_name_, model_info.max_batch_size_ != 0);
}
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <iostream>
#include <string>
#include "grpc_client.h"
#include "http_client.h"
#include "shm_utils.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
union TritonClient {
TritonClient()
{
new (&http_client_) std::unique_ptr<tc::InferenceServerHttpClient>{};
}
~TritonClient() {}
std::unique_ptr<tc::InferenceServerHttpClient> http_client_;
std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_;
};
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(
result->Shape(name, &shape), "unable to get shape for '" + name + "'");
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for '" << name << "'"
<< std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype),
"unable to get datatype for '" + name + "'");
// Validate datatype
if (datatype.compare("INT32") != 0) {
std::cerr << "error: received incorrect datatype for '" << name
<< "': " << datatype << std::endl;
exit(1);
}
}
void
InferAndValidate(
const bool use_shared_memory, TritonClient& triton_client,
const std::string& protocol, const tc::InferOptions& options,
const tc::Headers& http_headers, std::vector<tc::InferInput*>& inputs,
const size_t input_byte_size,
std::vector<tc::InferRequestedOutput*>& outputs,
const size_t output_byte_size, std::vector<int*>& shm_ptrs)
{
std::vector<int32_t> input0_data(16);
std::vector<int32_t> input1_data(16);
int32_t* input0_data_ptr;
int32_t* input1_data_ptr;
int32_t* output0_data_ptr;
int32_t* output1_data_ptr;
FAIL_IF_ERR(inputs[0]->Reset(), "unable to reset input 'INPUT0'");
FAIL_IF_ERR(inputs[1]->Reset(), "unable to reset input 'INPUT1'");
if (use_shared_memory) {
input0_data_ptr = shm_ptrs[0];
input1_data_ptr = shm_ptrs[1];
FAIL_IF_ERR(
inputs[0]->SetSharedMemory(
"input_data", input_byte_size, 0 /* offset */),
"unable to set shared memory for INPUT0");
FAIL_IF_ERR(
inputs[1]->SetSharedMemory(
"input_data", input_byte_size, input_byte_size /* offset */),
"unable to set shared memory for INPUT1");
FAIL_IF_ERR(
outputs[0]->SetSharedMemory(
"output_data", output_byte_size, 0 /* offset */),
"unable to set shared memory for 'OUTPUT0'");
FAIL_IF_ERR(
outputs[1]->SetSharedMemory(
"output_data", output_byte_size, output_byte_size /* offset */),
"unable to set shared memory for 'OUTPUT1'");
} else {
input0_data_ptr = &input0_data[0];
input1_data_ptr = &input1_data[0];
// Create the data for the two input tensors. Initialize the first
// to unique integers and the second to all twos. We use twos instead
// of ones in input1_data to validate whether inputs were set correctly.
for (size_t i = 0; i < 16; ++i) {
input0_data[i] = i;
input1_data[i] = 2;
}
FAIL_IF_ERR(
inputs[0]->AppendRaw(
reinterpret_cast<uint8_t*>(&input0_data[0]),
input0_data.size() * sizeof(int32_t)),
"unable to set data for 'INPUT0'");
FAIL_IF_ERR(
inputs[1]->AppendRaw(
reinterpret_cast<uint8_t*>(&input1_data[0]),
input1_data.size() * sizeof(int32_t)),
"unable to set data for 'INPUT1'");
FAIL_IF_ERR(
outputs[0]->UnsetSharedMemory(),
"unable to unset shared memory for 'OUTPUT0'");
FAIL_IF_ERR(
outputs[1]->UnsetSharedMemory(),
"unable to unset shared memory for 'OUTPUT1'");
}
std::vector<const tc::InferRequestedOutput*> routputs = {
outputs[0], outputs[1]};
tc::InferResult* results;
if (protocol == "http") {
FAIL_IF_ERR(
triton_client.http_client_->Infer(
&results, options, inputs, routputs, http_headers),
"unable to run model");
} else {
FAIL_IF_ERR(
triton_client.grpc_client_->Infer(
&results, options, inputs, routputs, http_headers),
"unable to run model");
}
std::shared_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", results_ptr);
ValidateShapeAndDatatype("OUTPUT1", results_ptr);
if (use_shared_memory) {
std::cout << "\n\n======== SHARED_MEMORY ========\n";
output0_data_ptr = shm_ptrs[2];
output1_data_ptr = shm_ptrs[3];
} else {
std::cout << "\n\n======== NO_SHARED_MEMORY ========\n";
// Get pointers to the result returned...
size_t recv_output0_byte_size;
FAIL_IF_ERR(
results_ptr->RawData(
"OUTPUT0", (const uint8_t**)&output0_data_ptr,
&recv_output0_byte_size),
"unable to get result data for 'OUTPUT0'");
if (recv_output0_byte_size != output_byte_size) {
std::cerr << "error: received incorrect byte size for 'OUTPUT0': "
<< recv_output0_byte_size << std::endl;
exit(1);
}
size_t recv_output1_byte_size;
FAIL_IF_ERR(
results_ptr->RawData(
"OUTPUT1", (const uint8_t**)&output1_data_ptr,
&recv_output1_byte_size),
"unable to get result data for 'OUTPUT1'");
if (recv_output1_byte_size != output_byte_size) {
std::cerr << "error: received incorrect byte size for 'OUTPUT1': "
<< recv_output1_byte_size << std::endl;
exit(1);
}
}
for (size_t i = 0; i < 16; ++i) {
std::cout << input0_data_ptr[i] << " + " << input1_data_ptr[i] << " = "
<< output0_data_ptr[i] << std::endl;
std::cout << input0_data_ptr[i] << " - " << input1_data_ptr[i] << " = "
<< output1_data_ptr[i] << std::endl;
if ((input0_data_ptr[i] + input1_data_ptr[i]) != output0_data_ptr[i]) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if ((input0_data_ptr[i] - input1_data_ptr[i]) != output1_data_ptr[i]) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
std::cout << "\n======== END ========\n\n";
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
// Tests whether the same InferInput and InferRequestedOutput objects can be
// successfully used repeatedly for different inferences using/not-using
// shared memory.
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8000");
bool url_specified = false;
tc::Headers http_headers;
std::string protocol("http");
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:i:H:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
url_specified = true;
break;
case 'i':
protocol = optarg;
std::transform(
protocol.begin(), protocol.end(), protocol.begin(), ::tolower);
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
// Create the inference client for the server. From it
// extract and validate that the model meets the requirements for
// image classification.
TritonClient triton_client;
tc::Error err;
if (protocol == "http") {
err = tc::InferenceServerHttpClient::Create(
&triton_client.http_client_, url, verbose);
} else if (protocol == "grpc") {
if (!url_specified) {
url = "localhost:8001";
}
err = tc::InferenceServerGrpcClient::Create(
&triton_client.grpc_client_, url, verbose);
} else {
std::cerr
<< "error: unsupported protocol provided: only supports grpc or http."
<< std::endl;
exit(1);
}
if (!err.IsOk()) {
std::cerr << "error: unable to create client for inference: " << err
<< std::endl;
exit(1);
}
// Unregistering all shared memory regions for a clean
// start.
if (protocol == "http") {
FAIL_IF_ERR(
triton_client.http_client_->UnregisterSystemSharedMemory(),
"unable to unregister all system shared memory regions");
FAIL_IF_ERR(
triton_client.http_client_->UnregisterCudaSharedMemory(),
"unable to unregister all cuda shared memory regions");
} else {
FAIL_IF_ERR(
triton_client.grpc_client_->UnregisterSystemSharedMemory(),
"unable to unregister all system shared memory regions");
FAIL_IF_ERR(
triton_client.grpc_client_->UnregisterCudaSharedMemory(),
"unable to unregister all cuda shared memory regions");
}
std::vector<int64_t> shape{1, 16};
size_t input_byte_size = 64;
size_t output_byte_size = 64;
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
// Create Input0 and Input1 in Shared Memory. Initialize Input0 to unique
// integers and Input1 to all ones.
std::string shm_key = "/input_simple";
int shm_fd_ip, *input0_shm;
FAIL_IF_ERR(
tc::CreateSharedMemoryRegion(shm_key, input_byte_size * 2, &shm_fd_ip),
"");
FAIL_IF_ERR(
tc::MapSharedMemory(
shm_fd_ip, 0, input_byte_size * 2, (void**)&input0_shm),
"");
FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_ip), "");
int* input1_shm = (int*)(input0_shm + 16);
for (size_t i = 0; i < 16; ++i) {
*(input0_shm + i) = i;
*(input1_shm + i) = 1;
}
if (protocol == "http") {
FAIL_IF_ERR(
triton_client.http_client_->RegisterSystemSharedMemory(
"input_data", "/input_simple", input_byte_size * 2),
"failed to register input shared memory region");
} else {
FAIL_IF_ERR(
triton_client.grpc_client_->RegisterSystemSharedMemory(
"input_data", "/input_simple", input_byte_size * 2),
"failed to register input shared memory region");
}
// Generate the outputs to be requested.
tc::InferRequestedOutput* output0;
tc::InferRequestedOutput* output1;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
"unable to get 'OUTPUT0'");
std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
output0_ptr.reset(output0);
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
"unable to get 'OUTPUT1'");
std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
output1_ptr.reset(output1);
// Create Output0 and Output1 in Shared Memory
shm_key = "/output_simple";
int shm_fd_op;
int* output0_shm;
FAIL_IF_ERR(
tc::CreateSharedMemoryRegion(shm_key, output_byte_size * 2, &shm_fd_op),
"");
FAIL_IF_ERR(
tc::MapSharedMemory(
shm_fd_op, 0, output_byte_size * 2, (void**)&output0_shm),
"");
FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_op), "");
int* output1_shm = (int*)(output0_shm + 16);
if (protocol == "http") {
FAIL_IF_ERR(
triton_client.http_client_->RegisterSystemSharedMemory(
"output_data", "/output_simple", output_byte_size * 2),
"failed to register output shared memory region");
} else {
FAIL_IF_ERR(
triton_client.grpc_client_->RegisterSystemSharedMemory(
"output_data", "/output_simple", output_byte_size * 2),
"failed to register output shared memory region");
}
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
std::vector<tc::InferRequestedOutput*> outputs = {
output0_ptr.get(), output1_ptr.get()};
std::vector<int*> shm_ptrs = {
input0_shm, input1_shm, output0_shm, output1_shm};
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
// Issue inference using shared memory
InferAndValidate(
true /* use_shared_memory */, triton_client, protocol, options,
http_headers, inputs, input_byte_size, outputs, output_byte_size,
shm_ptrs);
// Issue inference without using shared memory
InferAndValidate(
false /* use_shared_memory */, triton_client, protocol, options,
http_headers, inputs, input_byte_size, outputs, output_byte_size,
shm_ptrs);
// Issue inference using shared memory
InferAndValidate(
true /* use_shared_memory */, triton_client, protocol, options,
http_headers, inputs, input_byte_size, outputs, output_byte_size,
shm_ptrs);
// Unregister shared memory
if (protocol == "http") {
FAIL_IF_ERR(
triton_client.http_client_->UnregisterSystemSharedMemory("input_data"),
"unable to unregister shared memory input region");
FAIL_IF_ERR(
triton_client.http_client_->UnregisterSystemSharedMemory("output_data"),
"unable to unregister shared memory output region");
} else {
FAIL_IF_ERR(
triton_client.grpc_client_->UnregisterSystemSharedMemory("input_data"),
"unable to unregister shared memory input region");
FAIL_IF_ERR(
triton_client.grpc_client_->UnregisterSystemSharedMemory("output_data"),
"unable to unregister shared memory output region");
}
// Cleanup shared memory
FAIL_IF_ERR(tc::UnmapSharedMemory(input0_shm, input_byte_size * 2), "");
FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/input_simple"), "");
FAIL_IF_ERR(tc::UnmapSharedMemory(output0_shm, output_byte_size * 2), "");
FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/output_simple"), "");
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <condition_variable>
#include <iostream>
#include <mutex>
#include <string>
#include "grpc_client.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(
result->Shape(name, &shape), "unable to get shape for '" + name + "'");
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for '" << name << "'"
<< std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype),
"unable to get datatype for '" + name + "'");
// Validate datatype
if (datatype.compare("INT32") != 0) {
std::cerr << "error: received incorrect datatype for '" << name
<< "': " << datatype << std::endl;
exit(1);
}
}
void
ValidateResult(
const std::shared_ptr<tc::InferResult> result,
std::vector<int32_t>& input0_data, std::vector<int32_t>& input1_data)
{
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", result);
ValidateShapeAndDatatype("OUTPUT1", result);
// Get pointers to the result returned...
int32_t* output0_data;
size_t output0_byte_size;
FAIL_IF_ERR(
result->RawData(
"OUTPUT0", (const uint8_t**)&output0_data, &output0_byte_size),
"unable to get result data for 'OUTPUT0'");
if (output0_byte_size != 64) {
std::cerr << "error: received incorrect byte size for 'OUTPUT0': "
<< output0_byte_size << std::endl;
exit(1);
}
int32_t* output1_data;
size_t output1_byte_size;
FAIL_IF_ERR(
result->RawData(
"OUTPUT1", (const uint8_t**)&output1_data, &output1_byte_size),
"unable to get result data for 'OUTPUT1'");
if (output0_byte_size != 64) {
std::cerr << "error: received incorrect byte size for 'OUTPUT1': "
<< output0_byte_size << std::endl;
exit(1);
}
for (size_t i = 0; i < 16; ++i) {
std::cout << input0_data[i] << " + " << input1_data[i] << " = "
<< *(output0_data + i) << std::endl;
std::cout << input0_data[i] << " - " << input1_data[i] << " = "
<< *(output1_data + i) << std::endl;
if ((input0_data[i] + input1_data[i]) != *(output0_data + i)) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if ((input0_data[i] - input1_data[i]) != *(output1_data + i)) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
// Get full response
std::cout << result->DebugString() << std::endl;
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-t <client timeout in microseconds>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8001");
tc::Headers http_headers;
uint32_t client_timeout = 0;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:t:H:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 't':
client_timeout = std::stoi(optarg);
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
// Create a InferenceServerGrpcClient instance to communicate with the
// server using gRPC protocol.
std::unique_ptr<tc::InferenceServerGrpcClient> client;
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(&client, url, verbose),
"unable to create grpc client");
// Create the data for the two input tensors. Initialize the first
// to unique integers and the second to all ones.
std::vector<int32_t> input0_data(16);
std::vector<int32_t> input1_data(16);
for (size_t i = 0; i < 16; ++i) {
input0_data[i] = i;
input1_data[i] = 1;
}
std::vector<int64_t> shape{1, 16};
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
FAIL_IF_ERR(
input0_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&input0_data[0]),
input0_data.size() * sizeof(int32_t)),
"unable to set data for INPUT0");
FAIL_IF_ERR(
input1_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&input1_data[0]),
input1_data.size() * sizeof(int32_t)),
"unable to set data for INPUT1");
// Generate the outputs to be requested.
tc::InferRequestedOutput* output0;
tc::InferRequestedOutput* output1;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
"unable to get 'OUTPUT0'");
std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
output0_ptr.reset(output0);
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
"unable to get 'OUTPUT1'");
std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
output1_ptr.reset(output1);
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
options.client_timeout_ = client_timeout;
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {
output0_ptr.get(), output1_ptr.get()};
// Send inference request to the inference server.
std::mutex mtx;
std::condition_variable cv;
size_t repeat_cnt = 2;
size_t done_cnt = 0;
for (size_t i = 0; i < repeat_cnt; i++) {
FAIL_IF_ERR(
client->AsyncInfer(
[&, i](tc::InferResult* result) {
{
std::shared_ptr<tc::InferResult> result_ptr;
result_ptr.reset(result);
std::lock_guard<std::mutex> lk(mtx);
std::cout << "Callback no." << i << " is called" << std::endl;
done_cnt++;
if (result_ptr->RequestStatus().IsOk()) {
ValidateResult(result_ptr, input0_data, input1_data);
} else {
std::cerr << "error: Inference failed: "
<< result_ptr->RequestStatus() << std::endl;
exit(1);
}
}
cv.notify_all();
},
options, inputs, outputs, http_headers),
"unable to run model");
}
// Wait until all callbacks are invoked
{
std::unique_lock<std::mutex> lk(mtx);
cv.wait(lk, [&]() {
if (done_cnt >= repeat_cnt) {
return true;
} else {
return false;
}
});
}
if (done_cnt == repeat_cnt) {
std::cout << "All done" << std::endl;
} else {
std::cerr << "Done cnt: " << done_cnt
<< " does not match repeat cnt: " << repeat_cnt << std::endl;
exit(1);
}
// Send another AsyncInfer whose callback defers the completed request
// to another thread (main thread) to handle
bool callback_invoked = false;
std::shared_ptr<tc::InferResult> result_placeholder;
FAIL_IF_ERR(
client->AsyncInfer(
[&](tc::InferResult* result) {
{
std::shared_ptr<tc::InferResult> result_ptr;
result_ptr.reset(result);
// Defer the response retrieval to main thread
std::lock_guard<std::mutex> lk(mtx);
callback_invoked = true;
result_placeholder = std::move(result_ptr);
}
cv.notify_all();
},
options, inputs, outputs, http_headers),
"unable to run model");
// Ensure callback is completed
{
std::unique_lock<std::mutex> lk(mtx);
cv.wait(lk, [&]() { return callback_invoked; });
}
// Get deferred response
std::cout << "Getting results from deferred response" << std::endl;
if (result_placeholder->RequestStatus().IsOk()) {
ValidateResult(result_placeholder, input0_data, input1_data);
} else {
std::cerr << "error: Inference failed: "
<< result_placeholder->RequestStatus() << std::endl;
exit(1);
}
tc::InferStat infer_stat;
client->ClientInferStat(&infer_stat);
std::cout << "completed_request_count " << infer_stat.completed_request_count
<< std::endl;
std::cout << "cumulative_total_request_time_ns "
<< infer_stat.cumulative_total_request_time_ns << std::endl;
std::cout << "cumulative_send_time_ns " << infer_stat.cumulative_send_time_ns
<< std::endl;
std::cout << "cumulative_receive_time_ns "
<< infer_stat.cumulative_receive_time_ns << std::endl;
std::cout << "PASS : Async Infer" << std::endl;
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cuda_runtime_api.h>
#include <unistd.h>
#include <iostream>
#include <string>
#include "grpc_client.h"
#include "shm_utils.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(
result->Shape(name, &shape), "unable to get shape for '" + name + "'");
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for '" << name << "'"
<< std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype),
"unable to get datatype for '" + name + "'");
// Validate datatype
if (datatype.compare("INT32") != 0) {
std::cerr << "error: received incorrect datatype for '" << name
<< "': " << datatype << std::endl;
exit(1);
}
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
#define FAIL_IF_CUDA_ERR(FUNC) \
{ \
const cudaError_t result = FUNC; \
if (result != cudaSuccess) { \
std::cerr << "CUDA exception (line " << __LINE__ \
<< "): " << cudaGetErrorName(result) << " (" \
<< cudaGetErrorString(result) << ")" << std::endl; \
exit(1); \
} \
}
void
CreateCUDAIPCHandle(
cudaIpcMemHandle_t* cuda_handle, void* input_d_ptr, int device_id = 0)
{
// Set the GPU device to the desired GPU
FAIL_IF_CUDA_ERR(cudaSetDevice(device_id));
// Create IPC handle for data on the gpu
FAIL_IF_CUDA_ERR(cudaIpcGetMemHandle(cuda_handle, input_d_ptr));
}
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8001");
tc::Headers http_headers;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
// Create a InferenceServerGrpcClient instance to communicate with the
// server using gRPC protocol.
std::unique_ptr<tc::InferenceServerGrpcClient> client;
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(&client, url, verbose),
"unable to create grpc client");
// Unregistering all shared memory regions for a clean
// start.
FAIL_IF_ERR(
client->UnregisterSystemSharedMemory(),
"unable to unregister all system shared memory regions");
FAIL_IF_ERR(
client->UnregisterCudaSharedMemory(),
"unable to unregister all cuda shared memory regions");
std::vector<int64_t> shape{1, 16};
size_t input_byte_size = 64;
size_t output_byte_size = 64;
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
// Create Input0 and Input1 in CUDA Shared Memory. Initialize Input0 to
// unique integers and Input1 to all ones.
int input_data[32];
for (size_t i = 0; i < 16; ++i) {
input_data[i] = i;
input_data[16 + i] = 1;
}
// copy INPUT0 and INPUT1 data in GPU shared memory
int* input_d_ptr;
cudaMalloc((void**)&input_d_ptr, input_byte_size * 2);
cudaMemcpy(
(void*)input_d_ptr, (void*)input_data, input_byte_size * 2,
cudaMemcpyHostToDevice);
cudaIpcMemHandle_t input_cuda_handle;
CreateCUDAIPCHandle(&input_cuda_handle, (void*)input_d_ptr);
FAIL_IF_ERR(
client->RegisterCudaSharedMemory(
"input_data", input_cuda_handle, 0 /* device_id */,
input_byte_size * 2),
"failed to register input shared memory region");
FAIL_IF_ERR(
input0_ptr->SetSharedMemory(
"input_data", input_byte_size, 0 /* offset */),
"unable to set shared memory for INPUT0");
FAIL_IF_ERR(
input1_ptr->SetSharedMemory(
"input_data", input_byte_size, input_byte_size /* offset */),
"unable to set shared memory for INPUT1");
// Generate the outputs to be requested.
tc::InferRequestedOutput* output0;
tc::InferRequestedOutput* output1;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
"unable to get 'OUTPUT0'");
std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
output0_ptr.reset(output0);
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
"unable to get 'OUTPUT1'");
std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
output1_ptr.reset(output1);
// Create Output0 and Output1 in CUDA Shared Memory
int *output0_d_ptr, *output1_d_ptr;
cudaMalloc((void**)&output0_d_ptr, output_byte_size * 2);
output1_d_ptr = (int*)output0_d_ptr + 16;
cudaIpcMemHandle_t output_cuda_handle;
CreateCUDAIPCHandle(&output_cuda_handle, (void*)output0_d_ptr);
FAIL_IF_ERR(
client->RegisterCudaSharedMemory(
"output_data", output_cuda_handle, 0 /* device_id */,
output_byte_size * 2),
"failed to register output shared memory region");
FAIL_IF_ERR(
output0_ptr->SetSharedMemory(
"output_data", output_byte_size, 0 /* offset */),
"unable to set shared memory for 'OUTPUT0'");
FAIL_IF_ERR(
output1_ptr->SetSharedMemory(
"output_data", output_byte_size, output_byte_size /* offset */),
"unable to set shared memory for 'OUTPUT1'");
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {
output0_ptr.get(), output1_ptr.get()};
tc::InferResult* results;
FAIL_IF_ERR(
client->Infer(&results, options, inputs, outputs, http_headers),
"unable to run model");
std::shared_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", results_ptr);
ValidateShapeAndDatatype("OUTPUT1", results_ptr);
// Copy input and output data back to the CPU
int output0_data[16], output1_data[16];
cudaMemcpy(
output0_data, output0_d_ptr, output_byte_size, cudaMemcpyDeviceToHost);
cudaMemcpy(
output1_data, output1_d_ptr, output_byte_size, cudaMemcpyDeviceToHost);
for (size_t i = 0; i < 16; ++i) {
std::cout << input_data[i] << " + " << input_data[16 + i] << " = "
<< output0_data[i] << std::endl;
std::cout << input_data[i] << " + " << input_data[16 + i] << " = "
<< output1_data[i] << std::endl;
if ((input_data[i] + input_data[16 + i]) != output0_data[i]) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if ((input_data[i] - input_data[16 + i]) != output1_data[i]) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
// Get shared memory regions active/registered within triton
inference::CudaSharedMemoryStatusResponse status;
FAIL_IF_ERR(
client->CudaSharedMemoryStatus(&status),
"failed to get shared memory status");
std::cout << "Shared Memory Status:\n" << status.DebugString() << "\n";
// Unregister shared memory
FAIL_IF_ERR(
client->UnregisterCudaSharedMemory("input_data"),
"unable to unregister shared memory input region");
FAIL_IF_ERR(
client->UnregisterCudaSharedMemory("output_data"),
"unable to unregister shared memory output region");
// Free GPU memory
FAIL_IF_CUDA_ERR(cudaFree(input_d_ptr));
FAIL_IF_CUDA_ERR(cudaFree(output0_d_ptr));
std::cout << "PASS : Cuda Shared Memory " << std::endl;
return 0;
}
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <getopt.h>
#include <unistd.h>
#include <iostream>
#include <string>
#include "grpc_client.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(
result->Shape(name, &shape), "unable to get shape for '" + name + "'");
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for '" << name << "'"
<< std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype),
"unable to get datatype for '" + name + "'");
// Validate datatype
if (datatype.compare("INT32") != 0) {
std::cerr << "error: received incorrect datatype for '" << name
<< "': " << datatype << std::endl;
exit(1);
}
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-m <model name>" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-t <client timeout in microseconds>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8001");
tc::Headers http_headers;
uint32_t client_timeout = 0;
bool use_ssl = false;
tc::SslOptions ssl_options;
grpc::ChannelArguments channel_args;
// Set any valid grpc::ChannelArguments here based on use case
channel_args.SetMaxSendMessageSize(1024 * 1024);
channel_args.SetMaxReceiveMessageSize(1024 * 1024);
// Setting KeepAlive options using new generic channel arguments option
// https://grpc.github.io/grpc/cpp/md_doc_keepalive.html
channel_args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, INT_MAX);
channel_args.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, 20000);
channel_args.SetInt(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, false);
channel_args.SetInt(GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA, 2);
// Example arg requested for the feature
channel_args.SetInt(GRPC_ARG_DNS_ENABLE_SRV_QUERIES, 1);
// Parse commandline...
int opt;
while ((opt = getopt_long(argc, argv, "vu:t:H:C:", NULL, NULL)) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 't':
client_timeout = std::stoi(optarg);
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
// Create a InferenceServerGrpcClient instance to communicate with the
// server using gRPC protocol.
std::unique_ptr<tc::InferenceServerGrpcClient> client;
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(
&client, url, channel_args, verbose, use_ssl, ssl_options),
"unable to create grpc client");
// Create the data for the two input tensors. Initialize the first
// to unique integers and the second to all ones.
std::vector<int32_t> input0_data(16);
std::vector<int32_t> input1_data(16);
for (size_t i = 0; i < 16; ++i) {
input0_data[i] = i;
input1_data[i] = 1;
}
std::vector<int64_t> shape{1, 16};
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
FAIL_IF_ERR(
input0_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&input0_data[0]),
input0_data.size() * sizeof(int32_t)),
"unable to set data for INPUT0");
FAIL_IF_ERR(
input1_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&input1_data[0]),
input1_data.size() * sizeof(int32_t)),
"unable to set data for INPUT1");
// Generate the outputs to be requested.
tc::InferRequestedOutput* output0;
tc::InferRequestedOutput* output1;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
"unable to get 'OUTPUT0'");
std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
output0_ptr.reset(output0);
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
"unable to get 'OUTPUT1'");
std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
output1_ptr.reset(output1);
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
options.client_timeout_ = client_timeout;
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {
output0_ptr.get(), output1_ptr.get()};
tc::InferResult* results;
FAIL_IF_ERR(
client->Infer(&results, options, inputs, outputs, http_headers),
"unable to run model");
std::shared_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", results_ptr);
ValidateShapeAndDatatype("OUTPUT1", results_ptr);
// Get pointers to the result returned...
int32_t* output0_data;
size_t output0_byte_size;
FAIL_IF_ERR(
results_ptr->RawData(
"OUTPUT0", (const uint8_t**)&output0_data, &output0_byte_size),
"unable to get result data for 'OUTPUT0'");
if (output0_byte_size != 64) {
std::cerr << "error: received incorrect byte size for 'OUTPUT0': "
<< output0_byte_size << std::endl;
exit(1);
}
int32_t* output1_data;
size_t output1_byte_size;
FAIL_IF_ERR(
results_ptr->RawData(
"OUTPUT1", (const uint8_t**)&output1_data, &output1_byte_size),
"unable to get result data for 'OUTPUT1'");
if (output1_byte_size != 64) {
std::cerr << "error: received incorrect byte size for 'OUTPUT1': "
<< output1_byte_size << std::endl;
exit(1);
}
for (size_t i = 0; i < 16; ++i) {
std::cout << input0_data[i] << " + " << input1_data[i] << " = "
<< *(output0_data + i) << std::endl;
std::cout << input0_data[i] << " - " << input1_data[i] << " = "
<< *(output1_data + i) << std::endl;
if ((input0_data[i] + input1_data[i]) != *(output0_data + i)) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if ((input0_data[i] - input1_data[i]) != *(output1_data + i)) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
// Get full response
std::cout << results_ptr->DebugString() << std::endl;
std::cout << "PASS : CustomArgs" << std::endl;
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <atomic>
#include <condition_variable>
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include "grpc_client.h"
namespace tc = triton::client;
using ResultMap =
std::map<std::string, std::vector<std::shared_ptr<tc::InferResult>>>;
using ResultList = std::vector<std::shared_ptr<tc::InferResult>>;
// Global mutex to synchronize the threads
std::mutex mutex_;
std::condition_variable cv_;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service and its gRPC port>"
<< std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
std::cerr << "\t-r <the number of inference requests>" << std::endl;
std::cerr << "\t-s <the number of inference response to generate per request>"
<< std::endl;
std::cerr << "\t-o <data offset>" << std::endl;
std::cerr << "\t-d <delay time between each response>" << std::endl;
std::cerr << "\t-w <wait time before releasing the request>" << std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8001");
tc::Headers http_headers;
int request_count = 1;
int repeat_count = 1;
int data_offset = 100;
uint32_t delay_time = 1000;
uint32_t wait_time = 500;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:H:r:s:o:d:w:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case 'r':
request_count = std::stoi(optarg);
break;
case 's':
repeat_count = std::stoi(optarg);
break;
case 'o':
data_offset = std::stoi(optarg);
break;
case 'd':
delay_time = std::stoi(optarg);
break;
case 'w':
wait_time = std::stoi(optarg);
break;
case '?':
Usage(argv);
break;
}
}
tc::Error err;
// We use the custom "repeat_int32" model which takes 3 inputs and
// 1 output. For a single request the model will generate 'repeat_count'
// responses. See is src/backends/backend/examples/repeat.cc.
std::string model_name = "repeat_int32";
std::atomic<int32_t> received_response(0);
// Create a InferenceServerGrpcClient instance to communicate with the
// server using gRPC protocol.
std::unique_ptr<tc::InferenceServerGrpcClient> client;
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(&client, url, verbose),
"unable to create grpc client");
ResultMap result_map;
// Note that client side statistics should be disabled in case of
// of decoupled model.
FAIL_IF_ERR(
client->StartStream(
[&](tc::InferResult* result) {
{
std::shared_ptr<tc::InferResult> result_ptr(result);
std::lock_guard<std::mutex> lk(mutex_);
std::string request_id;
result->Id(&request_id);
auto it = result_map.find(request_id);
if (it == result_map.end()) {
result_map[request_id] = ResultList();
}
result_map[request_id].push_back(result_ptr);
received_response++;
}
cv_.notify_all();
},
false /*enable_stats*/, 0 /* stream_timeout */, http_headers),
"unable to establish a streaming connection to server");
// Prepare the data for the tensors
std::vector<int32_t> in_data;
std::vector<uint32_t> delay_data;
std::vector<uint32_t> wait_data;
for (int i = 0; i < repeat_count; i++) {
in_data.push_back(data_offset + i);
delay_data.push_back(delay_time);
}
wait_data.push_back(wait_time);
// Initialize the inputs with the data.
tc::InferInput* in;
std::vector<int64_t> shape{repeat_count};
FAIL_IF_ERR(
tc::InferInput::Create(&in, "IN", shape, "INT32"),
"unable to create 'IN'");
std::shared_ptr<tc::InferInput> in_ptr(in);
FAIL_IF_ERR(in_ptr->Reset(), "unable to reset 'IN'");
FAIL_IF_ERR(
in_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&in_data[0]),
sizeof(int32_t) * repeat_count),
"unable to set data for 'IN'");
tc::InferInput* delay;
FAIL_IF_ERR(
tc::InferInput::Create(&delay, "DELAY", shape, "UINT32"),
"unable to create 'DELAY'");
std::shared_ptr<tc::InferInput> delay_ptr(delay);
FAIL_IF_ERR(delay_ptr->Reset(), "unable to reset 'DELAY'");
FAIL_IF_ERR(
delay_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&delay_data[0]),
sizeof(uint32_t) * repeat_count),
"unable to set data for 'DELAY'");
tc::InferInput* wait;
shape[0] = 1;
FAIL_IF_ERR(
tc::InferInput::Create(&wait, "WAIT", shape, "UINT32"),
"unable to create 'WAIT'");
std::shared_ptr<tc::InferInput> wait_ptr(wait);
FAIL_IF_ERR(wait_ptr->Reset(), "unable to reset 'WAIT'");
FAIL_IF_ERR(
wait_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&wait_data[0]), sizeof(uint32_t)),
"unable to set data for 'WAIT'");
std::vector<tc::InferInput*> inputs = {
in_ptr.get(), delay_ptr.get(), wait_ptr.get()};
tc::InferOptions options(model_name);
for (int id = 0; id < request_count; id++) {
options.request_id_ = std::to_string(id);
// Send inference request to the inference server.
FAIL_IF_ERR(
client->AsyncStreamInfer(options, inputs), "unable to run model");
}
// Wait until all callbacks are invoked
{
std::unique_lock<std::mutex> lk(mutex_);
cv_.wait(lk, [&]() {
if (received_response >= (repeat_count * request_count)) {
return true;
} else {
return false;
}
});
}
for (int i = 0; i < request_count; i++) {
std::string id(std::to_string(i));
if (repeat_count == 0) {
auto it = result_map.find(id);
if (it != result_map.end()) {
std::cerr << "received unexpected response for request id " << id
<< std::endl;
exit(1);
}
} else {
int32_t expected_output = data_offset;
auto it = result_map.find(id);
if (it == result_map.end()) {
std::cerr << "response for request id " << id << " not received"
<< std::endl;
exit(1);
}
if (it->second.size() != (uint32_t)repeat_count) {
std::cerr << "expected " << repeat_count << " many responses, got "
<< it->second.size() << std::endl;
exit(1);
}
for (auto this_result : it->second) {
int32_t* output_data;
size_t output_byte_size;
FAIL_IF_ERR(
this_result->RawData(
"OUT", (const uint8_t**)&output_data, &output_byte_size),
"unable to get result data for 'OUT'");
if (output_byte_size != 4) {
std::cerr << "error: received incorrect byte size for 'OUT': "
<< output_byte_size << std::endl;
exit(1);
}
if (*output_data != expected_output) {
std::cerr << "error: incorrect result returned, expected "
<< expected_output << ", got " << *output_data << std::endl;
exit(1);
}
expected_output++;
}
}
}
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <iostream>
#include <string>
#include "grpc_client.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8001");
tc::Headers http_headers;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
// Create a InferenceServerGrpcClient instance to communicate with the
// server using gRPC protocol.
std::unique_ptr<tc::InferenceServerGrpcClient> client;
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(&client, url, verbose),
"unable to create grpc client");
bool live;
FAIL_IF_ERR(
client->IsServerLive(&live, http_headers),
"unable to get server liveness");
if (!live) {
std::cerr << "error: server is not live" << std::endl;
exit(1);
}
bool ready;
FAIL_IF_ERR(
client->IsServerReady(&ready, http_headers),
"unable to get server readiness");
if (!ready) {
std::cerr << "error: server is not live" << std::endl;
exit(1);
}
bool model_ready;
FAIL_IF_ERR(
client->IsModelReady(
&model_ready, model_name, model_version, http_headers),
"unable to get model readiness");
if (!model_ready) {
std::cerr << "error: model " << model_name << " is not live" << std::endl;
exit(1);
}
inference::ServerMetadataResponse server_metadata;
FAIL_IF_ERR(
client->ServerMetadata(&server_metadata, http_headers),
"unable to get server metadata");
if (server_metadata.name().compare("triton") != 0) {
std::cerr << "error: unexpected server metadata: "
<< server_metadata.DebugString() << std::endl;
exit(1);
}
inference::ModelMetadataResponse model_metadata;
FAIL_IF_ERR(
client->ModelMetadata(
&model_metadata, model_name, model_version, http_headers),
"unable to get model metadata");
if (model_metadata.name().compare(model_name) != 0) {
std::cerr << "error: unexpected model metadata: "
<< model_metadata.DebugString() << std::endl;
exit(1);
}
inference::ModelConfigResponse model_config;
FAIL_IF_ERR(
client->ModelConfig(
&model_config, model_name, model_version, http_headers),
"unable to get model config");
if (model_config.config().name().compare(model_name) != 0) {
std::cerr << "error: unexpected model config: "
<< model_config.DebugString() << std::endl;
exit(1);
}
tc::Error err = client->ModelMetadata(
&model_metadata, "wrong_model_name", model_version, http_headers);
if (err.IsOk()) {
std::cerr << "error: expected an error but got: " << err << std::endl;
exit(1);
}
std::cout << err << std::endl;
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <getopt.h>
#include <unistd.h>
#include <iostream>
#include <string>
#include "grpc_client.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(
result->Shape(name, &shape), "unable to get shape for '" + name + "'");
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for '" << name << "'"
<< std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype),
"unable to get datatype for '" + name + "'");
// Validate datatype
if (datatype.compare("INT32") != 0) {
std::cerr << "error: received incorrect datatype for '" << name
<< "': " << datatype << std::endl;
exit(1);
}
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-m <model name>" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-t <client timeout in microseconds>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr
<< "\tFor -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
std::cerr << "\t-C <grpc compression algorithm>. \'deflate\', "
"\'gzip\' and \'none\' are supported"
<< std::endl;
std::cerr << "\t-c <use_cached_channel>. "
" Use cached channel when creating new client. "
" Specify 'true' or 'false'. True by default"
<< std::endl;
std::cerr << std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8001");
tc::Headers http_headers;
uint32_t client_timeout = 0;
bool use_ssl = false;
std::string root_certificates;
std::string private_key;
std::string certificate_chain;
grpc_compression_algorithm compression_algorithm =
grpc_compression_algorithm::GRPC_COMPRESS_NONE;
bool test_use_cached_channel = false;
bool use_cached_channel = true;
// {name, has_arg, *flag, val}
static struct option long_options[] = {
{"ssl", 0, 0, 0},
{"root-certificates", 1, 0, 1},
{"private-key", 1, 0, 2},
{"certificate-chain", 1, 0, 3}};
// Parse commandline...
int opt;
while ((opt = getopt_long(argc, argv, "vu:t:H:C:c:", long_options, NULL)) !=
-1) {
switch (opt) {
case 0:
use_ssl = true;
break;
case 1:
root_certificates = optarg;
break;
case 2:
private_key = optarg;
break;
case 3:
certificate_chain = optarg;
break;
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 't':
client_timeout = std::stoi(optarg);
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
if (header.size() == arg.size() || header.empty()) {
Usage(
argv,
"HTTP header specified incorrectly. Must be formmated as "
"'Header:Value'");
} else {
http_headers[header] = arg.substr(header.size() + 1);
}
break;
}
case 'C': {
std::string algorithm_str{optarg};
if (algorithm_str.compare("deflate") == 0) {
compression_algorithm =
grpc_compression_algorithm::GRPC_COMPRESS_DEFLATE;
} else if (algorithm_str.compare("gzip") == 0) {
compression_algorithm =
grpc_compression_algorithm::GRPC_COMPRESS_GZIP;
} else if (algorithm_str.compare("none") == 0) {
compression_algorithm =
grpc_compression_algorithm::GRPC_COMPRESS_NONE;
} else {
Usage(
argv,
"unsupported compression algorithm specified... only "
"\'deflate\', "
"\'gzip\' and \'none\' are supported.");
}
break;
}
case 'c': {
test_use_cached_channel = true;
std::string arg = optarg;
if (arg.find("false") != std::string::npos) {
use_cached_channel = false;
} else if (arg.find("true") != std::string::npos) {
use_cached_channel = true;
} else {
Usage(argv, "need to specify true or false for use_cached_channel");
}
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
// Create a InferenceServerGrpcClient instance to communicate with the
// server using gRPC protocol.
std::unique_ptr<tc::InferenceServerGrpcClient> client;
tc::SslOptions ssl_options = tc::SslOptions();
std::string err;
if (use_ssl) {
ssl_options.root_certificates = root_certificates;
ssl_options.private_key = private_key;
ssl_options.certificate_chain = certificate_chain;
err = "unable to create secure grpc client";
} else {
err = "unable to create grpc client";
}
// Run with the same name to ensure cached channel is not used
int numRuns = test_use_cached_channel ? 2 : 1;
for (int i = 0; i < numRuns; ++i) {
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(
&client, url, verbose, use_ssl, ssl_options, tc::KeepAliveOptions(),
use_cached_channel),
err);
// Create the data for the two input tensors. Initialize the first
// to unique integers and the second to all ones.
std::vector<int32_t> input0_data(16);
std::vector<int32_t> input1_data(16);
for (size_t i = 0; i < 16; ++i) {
input0_data[i] = i;
input1_data[i] = 1;
}
std::vector<int64_t> shape{1, 16};
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
FAIL_IF_ERR(
input0_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&input0_data[0]),
input0_data.size() * sizeof(int32_t)),
"unable to set data for INPUT0");
FAIL_IF_ERR(
input1_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&input1_data[0]),
input1_data.size() * sizeof(int32_t)),
"unable to set data for INPUT1");
// Generate the outputs to be requested.
tc::InferRequestedOutput* output0;
tc::InferRequestedOutput* output1;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
"unable to get 'OUTPUT0'");
std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
output0_ptr.reset(output0);
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
"unable to get 'OUTPUT1'");
std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
output1_ptr.reset(output1);
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
options.client_timeout_ = client_timeout;
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {
output0_ptr.get(), output1_ptr.get()};
tc::InferResult* results;
FAIL_IF_ERR(
client->Infer(
&results, options, inputs, outputs, http_headers,
compression_algorithm),
"unable to run model");
std::shared_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", results_ptr);
ValidateShapeAndDatatype("OUTPUT1", results_ptr);
// Get pointers to the result returned...
int32_t* output0_data;
size_t output0_byte_size;
FAIL_IF_ERR(
results_ptr->RawData(
"OUTPUT0", (const uint8_t**)&output0_data, &output0_byte_size),
"unable to get result data for 'OUTPUT0'");
if (output0_byte_size != 64) {
std::cerr << "error: received incorrect byte size for 'OUTPUT0': "
<< output0_byte_size << std::endl;
exit(1);
}
int32_t* output1_data;
size_t output1_byte_size;
FAIL_IF_ERR(
results_ptr->RawData(
"OUTPUT1", (const uint8_t**)&output1_data, &output1_byte_size),
"unable to get result data for 'OUTPUT1'");
if (output1_byte_size != 64) {
std::cerr << "error: received incorrect byte size for 'OUTPUT1': "
<< output1_byte_size << std::endl;
exit(1);
}
for (size_t i = 0; i < 16; ++i) {
std::cout << input0_data[i] << " + " << input1_data[i] << " = "
<< *(output0_data + i) << std::endl;
std::cout << input0_data[i] << " - " << input1_data[i] << " = "
<< *(output1_data + i) << std::endl;
if ((input0_data[i] + input1_data[i]) != *(output0_data + i)) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if ((input0_data[i] - input1_data[i]) != *(output1_data + i)) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
// Get full response
std::cout << results_ptr->DebugString() << std::endl;
tc::InferStat infer_stat;
client->ClientInferStat(&infer_stat);
std::cout << "======Client Statistics======" << std::endl;
std::cout << "completed_request_count "
<< infer_stat.completed_request_count << std::endl;
std::cout << "cumulative_total_request_time_ns "
<< infer_stat.cumulative_total_request_time_ns << std::endl;
std::cout << "cumulative_send_time_ns "
<< infer_stat.cumulative_send_time_ns << std::endl;
std::cout << "cumulative_receive_time_ns "
<< infer_stat.cumulative_receive_time_ns << std::endl;
inference::ModelStatisticsResponse model_stat;
client->ModelInferenceStatistics(&model_stat, model_name);
std::cout << "======Model Statistics======" << std::endl;
std::cout << model_stat.DebugString() << std::endl;
std::cout << "PASS : Infer" << std::endl;
}
return 0;
}
// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <getopt.h>
#include <unistd.h>
#include <iostream>
#include <string>
#include "grpc_client.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(
result->Shape(name, &shape), "unable to get shape for '" + name + "'");
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for '" << name << "'"
<< std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype),
"unable to get datatype for '" + name + "'");
// Validate datatype
if (datatype.compare("INT32") != 0) {
std::cerr << "error: received incorrect datatype for '" << name
<< "': " << datatype << std::endl;
exit(1);
}
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-m <model name>" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-t <client timeout in microseconds>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << "\t--grpc-keepalive-time <milliseconds>" << std::endl;
std::cerr << "\t--grpc-keepalive-timeout <milliseconds>" << std::endl;
std::cerr << "\t--grpc-keepalive-permit-without-calls" << std::endl;
std::cerr << "\t--grpc-http2-max-pings-without-data <number of pings>"
<< std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8001");
tc::Headers http_headers;
uint32_t client_timeout = 0;
bool use_ssl = false;
tc::SslOptions ssl_options;
tc::KeepAliveOptions keepalive_options;
// GRPC KeepAlive: https://grpc.github.io/grpc/cpp/md_doc_keepalive.html
int keepalive_time_ms = INT_MAX;
int keepalive_timeout_ms = 20000;
bool keepalive_permit_without_calls = false;
int http2_max_pings_without_data = 2;
// {name, has_arg, *flag, val}
static struct option long_options[] = {
{"grpc-keepalive-time", 1, 0, 0},
{"grpc-keepalive-timeout", 1, 0, 1},
{"grpc-keepalive-permit-without-calls", 0, 0, 2},
{"grpc-http2-max-pings-without-data", 1, 0, 3}};
// Parse commandline...
int opt;
while ((opt = getopt_long(argc, argv, "vu:t:H:C:", long_options, NULL)) !=
-1) {
switch (opt) {
case 0:
keepalive_options.keepalive_time_ms = std::stoi(optarg);
break;
case 1:
keepalive_options.keepalive_timeout_ms = std::stoi(optarg);
break;
case 2:
keepalive_options.keepalive_permit_without_calls = true;
break;
case 3:
keepalive_options.http2_max_pings_without_data = std::stoi(optarg);
break;
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 't':
client_timeout = std::stoi(optarg);
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
// Create a InferenceServerGrpcClient instance to communicate with the
// server using gRPC protocol.
std::unique_ptr<tc::InferenceServerGrpcClient> client;
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(
&client, url, verbose, use_ssl, ssl_options, keepalive_options),
"unable to create grpc client");
// Create the data for the two input tensors. Initialize the first
// to unique integers and the second to all ones.
std::vector<int32_t> input0_data(16);
std::vector<int32_t> input1_data(16);
for (size_t i = 0; i < 16; ++i) {
input0_data[i] = i;
input1_data[i] = 1;
}
std::vector<int64_t> shape{1, 16};
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
FAIL_IF_ERR(
input0_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&input0_data[0]),
input0_data.size() * sizeof(int32_t)),
"unable to set data for INPUT0");
FAIL_IF_ERR(
input1_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&input1_data[0]),
input1_data.size() * sizeof(int32_t)),
"unable to set data for INPUT1");
// Generate the outputs to be requested.
tc::InferRequestedOutput* output0;
tc::InferRequestedOutput* output1;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
"unable to get 'OUTPUT0'");
std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
output0_ptr.reset(output0);
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
"unable to get 'OUTPUT1'");
std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
output1_ptr.reset(output1);
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
options.client_timeout_ = client_timeout;
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {
output0_ptr.get(), output1_ptr.get()};
tc::InferResult* results;
FAIL_IF_ERR(
client->Infer(&results, options, inputs, outputs, http_headers),
"unable to run model");
std::shared_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", results_ptr);
ValidateShapeAndDatatype("OUTPUT1", results_ptr);
// Get pointers to the result returned...
int32_t* output0_data;
size_t output0_byte_size;
FAIL_IF_ERR(
results_ptr->RawData(
"OUTPUT0", (const uint8_t**)&output0_data, &output0_byte_size),
"unable to get result data for 'OUTPUT0'");
if (output0_byte_size != 64) {
std::cerr << "error: received incorrect byte size for 'OUTPUT0': "
<< output0_byte_size << std::endl;
exit(1);
}
int32_t* output1_data;
size_t output1_byte_size;
FAIL_IF_ERR(
results_ptr->RawData(
"OUTPUT1", (const uint8_t**)&output1_data, &output1_byte_size),
"unable to get result data for 'OUTPUT1'");
if (output1_byte_size != 64) {
std::cerr << "error: received incorrect byte size for 'OUTPUT1': "
<< output1_byte_size << std::endl;
exit(1);
}
for (size_t i = 0; i < 16; ++i) {
std::cout << input0_data[i] << " + " << input1_data[i] << " = "
<< *(output0_data + i) << std::endl;
std::cout << input0_data[i] << " - " << input1_data[i] << " = "
<< *(output1_data + i) << std::endl;
if ((input0_data[i] + input1_data[i]) != *(output0_data + i)) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if ((input0_data[i] - input1_data[i]) != *(output1_data + i)) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
// Get full response
std::cout << results_ptr->DebugString() << std::endl;
std::cout << "PASS : KeepAlive" << std::endl;
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment