Commit c68e1835 authored by lijian6's avatar lijian6
Browse files

Initial commit

parents
Pipeline #561 failed with stages
in 0 seconds
---
BasedOnStyle: Google
IndentWidth: 2
ColumnLimit: 80
ContinuationIndentWidth: 4
UseTab: Never
MaxEmptyLinesToKeep: 2
SortIncludes: true
CompactNamespaces: true
ReflowComments: true
DerivePointerAlignment: false
PointerAlignment: Left
AllowShortIfStatementsOnASingleLine: false
AllowShortBlocksOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AlwaysBreakAfterReturnType: TopLevelDefinitions
AlignAfterOpenBracket: AlwaysBreak
BreakBeforeBraces: Custom
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: true
AfterNamespace: false
AfterStruct: false
AfterUnion: false
BeforeCatch: true
BinPackArguments: true
BinPackParameters: true
ConstructorInitializerAllOnOneLineOrOnePerLine: false
IndentCaseLabels: true
#VSCode
/.vscode
src/.vscode
src/c++/.vscode
src/python/.vscode
#C++
/build
*.so
src/c++/perf_analyzer/builddir/
src/c++/perf_analyzer/.vscode/
#Python
__pycache__/
*.pyc
#Other
node_modules
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
exclude: ^src/grpc_generated
repos:
- repo: https://github.com/timothycrosley/isort
rev: 5.12.0
hooks:
- id: isort
additional_dependencies: [toml]
- repo: https://github.com/psf/black
rev: 23.1.0
hooks:
- id: black
types_or: [python, cython]
- repo: https://github.com/PyCQA/flake8
rev: 5.0.4
hooks:
- id: flake8
args: [--max-line-length=88, --select=C,E,F,W,B,B950, --extend-ignore = E203,E501]
types_or: [python, cython]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v16.0.5
hooks:
- id: clang-format
types_or: [c, c++, cuda, proto, textproto, java]
args: ["-fallback-style=none", "-style=file", "-i"]
- repo: https://github.com/codespell-project/codespell
rev: v2.2.4
hooks:
- id: codespell
additional_dependencies: [tomli]
args: ["--toml", "pyproject.toml"]
exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$)
# More details about these pre-commit hooks here:
# https://pre-commit.com/hooks.html
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-case-conflict
- id: check-executables-have-shebangs
- id: check-merge-conflict
- id: check-json
- id: check-toml
- id: check-yaml
- id: check-shebang-scripts-are-executable
- id: end-of-file-fixer
types_or: [c, c++, cuda, proto, textproto, java, python]
- id: mixed-line-ending
- id: requirements-txt-fixer
- id: trailing-whitespace
# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.17)
project(tritonclient LANGUAGES C CXX)
#
# Options
#
set(TRITON_VERSION "0.0.0" CACHE STRING "Version for the clients")
set(PERF_ANALYZER_VERSION ${TRITON_VERSION} CACHE STRING "Build Version for Perf Analyzer")
option(TRITON_ENABLE_CC_HTTP "Build C++ HTTP client libraries" OFF)
option(TRITON_ENABLE_CC_GRPC "Build C++ GRPC client libraries" OFF)
option(TRITON_ENABLE_PYTHON_HTTP "Enable Python HTTP client libraries" OFF)
option(TRITON_ENABLE_PYTHON_GRPC "Enable Python GRPC client libraries" OFF)
option(TRITON_ENABLE_JAVA_HTTP "Enable JAVA HTTP client libraries" OFF)
option(TRITON_ENABLE_PERF_ANALYZER "Enable Performance Analyzer" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_C_API "Enable Performance Analyzer C API" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_TFS "Enable TensorFlow Serving support for Performance Analyzer" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_TS "Enable TorchServe support for Performance Analyzer" OFF)
option(TRITON_ENABLE_EXAMPLES "Include examples in build" OFF)
option(TRITON_ENABLE_TESTS "Include tests in build" OFF)
option(TRITON_ENABLE_GPU "Enable GPU support in libraries" OFF)
option(TRITON_ENABLE_ZLIB "Include ZLIB library in build" ON)
set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
set(TRITON_THIRD_PARTY_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/third_party repo")
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
if(NOT TRITON_ENABLE_PYTHON_GRPC)
set(TRITON_COMMON_ENABLE_PROTOBUF_PYTHON OFF)
endif()
#
# Dependencies
#
include(FetchContent)
FetchContent_Declare(
repo-third-party
GIT_REPOSITORY https://github.com/triton-inference-server/third_party.git
GIT_TAG ${TRITON_THIRD_PARTY_REPO_TAG}
GIT_SHALLOW ON
)
set(TRITON_THIRD_PARTY_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/third-party)
FetchContent_MakeAvailable(repo-third-party)
# Some libs are installed to ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib64 instead
# of ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib on Centos
set (LIB_DIR "lib")
# /etc/os-release does not exist on Windows
if(EXISTS "/etc/os-release")
file(STRINGS /etc/os-release DISTRO REGEX "^NAME=")
string(REGEX REPLACE "NAME=\"(.*)\"" "\\1" DISTRO "${DISTRO}")
message(STATUS "Distro Name: ${DISTRO}")
if(DISTRO MATCHES "CentOS.*")
set (LIB_DIR "lib64")
endif()
endif()
# Need to use ExternalProject for our builds so that we can get the
# correct dependencies between our components and the ExternalProject
# dependencies (found in the third_party repo)
include(ExternalProject)
if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(TRITON_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/cc-clients/install)
else()
set(TRITON_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
endif()
set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "")
if (OPENSSL_ROOT_DIR)
set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "-DOPENSSL_ROOT_DIR:PATH=${OPENSSL_ROOT_DIR}")
endif()
set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "")
if (CMAKE_TOOLCHAIN_FILE)
set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "-DCMAKE_TOOLCHAIN_FILE:PATH=${CMAKE_TOOLCHAIN_FILE}")
endif()
set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "")
if (VCPKG_TARGET_TRIPLET)
set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "-DVCPKG_TARGET_TRIPLET:STRING=${VCPKG_TARGET_TRIPLET}")
endif()
# Location where protobuf-config.cmake will be installed varies by
# platform
if (WIN32)
set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/cmake")
else()
set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/${LIB_DIR}/cmake/protobuf")
endif()
if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER OR TRITON_ENABLE_PERF_ANALYZER_C_API)
set(_cc_client_depends "")
if(${TRITON_ENABLE_CC_HTTP})
set(_cc_client_depends ${_cc_client_depends} curl)
endif() # TRITON_ENABLE_CC_HTTP
if(${TRITON_ENABLE_CC_GRPC} OR ${TRITON_ENABLE_PERF_ANALYZER})
set(_cc_client_depends ${_cc_client_depends} grpc protobuf)
endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_C_API})
message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_C_API=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_C_API
if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_TFS})
message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_TFS=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_TFS
if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_TS})
message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_TS=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_TS
ExternalProject_Add(cc-clients
PREFIX cc-clients
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/c++"
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/cc-clients"
CMAKE_CACHE_ARGS
${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
-DCURL_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/curl/${LIB_DIR}/cmake/CURL
-DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
-DgRPC_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/grpc/lib/cmake/grpc
-Dabsl_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/absl/${LIB_DIR}/cmake/absl
-Dc-ares_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/c-ares/${LIB_DIR}/cmake/c-ares
-DGTEST_ROOT:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/googletest
-DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG}
-DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG}
-DPERF_ANALYZER_VERSION:STRING=${PERF_ANALYZER_VERSION}
-DTRITON_ENABLE_CC_HTTP:BOOL=${TRITON_ENABLE_CC_HTTP}
-DTRITON_ENABLE_CC_GRPC:BOOL=${TRITON_ENABLE_CC_GRPC}
-DTRITON_ENABLE_PERF_ANALYZER:BOOL=${TRITON_ENABLE_PERF_ANALYZER}
-DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
-DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
-DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
-DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
-DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
-DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
-DTRITON_ENABLE_ZLIB:BOOL=${TRITON_ENABLE_ZLIB}
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
DEPENDS ${_cc_client_depends}
)
endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
if(TRITON_ENABLE_PYTHON_HTTP OR TRITON_ENABLE_PYTHON_GRPC)
set(_py_client_depends "")
if(${TRITON_ENABLE_PYTHON_GRPC})
set(_py_client_depends ${_py_client_depends} grpc protobuf)
endif() # TRITON_ENABLE_PYTHON_GRPC
if(${TRITON_ENABLE_PERF_ANALYZER})
set(_py_client_depends ${_py_client_depends} cc-clients)
endif() # TRITON_ENABLE_PERF_ANALYZER
ExternalProject_Add(python-clients
PREFIX python-clients
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/python"
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/python-clients"
CMAKE_CACHE_ARGS
${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
-DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
-DgRPC_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/grpc/lib/cmake/grpc
-Dabsl_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/absl/${LIB_DIR}/cmake/absl
-Dc-ares_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/c-ares/${LIB_DIR}/cmake/c-ares
-DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG}
-DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG}
-DTRITON_VERSION:STRING=${TRITON_VERSION}
-DTRITON_ENABLE_PYTHON_HTTP:BOOL=${TRITON_ENABLE_PYTHON_HTTP}
-DTRITON_ENABLE_PYTHON_GRPC:BOOL=${TRITON_ENABLE_PYTHON_GRPC}
-DTRITON_ENABLE_PERF_ANALYZER:BOOL=${TRITON_ENABLE_PERF_ANALYZER}
-DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
-DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
-DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
-DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
-DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
-DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
DEPENDS ${_py_client_depends}
)
endif() # TRITON_ENABLE_PYTHON_HTTP OR TRITON_ENABLE_PYTHON_GRPC
if(TRITON_ENABLE_JAVA_HTTP)
ExternalProject_Add(java-clients
PREFIX java-clients
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/java"
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/java-clients"
CMAKE_CACHE_ARGS
${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
-DTRITON_VERSION:STRING=${TRITON_VERSION}
-DTRITON_ENABLE_JAVA_HTTP:BOOL=${TRITON_ENABLE_JAVA_HTTP}
-DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
INSTALL_COMMAND ""
)
endif() # TRITON_ENABLE_JAVA_HTTP
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of NVIDIA CORPORATION nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This diff is collapsed.
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
[tool.codespell]
# note: pre-commit passes explicit lists of files here, which this skip file list doesn't override -
# this is only to allow you to run codespell interactively
# this also overrides the grpc_generated folder, since it is generated
skip = "./.git,./.github,./src/grpc_generated"
# ignore short words, and typename parameters like OffsetT
ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
# ignore allowed words
# ignoring atleast to avoid testing::AtLeast from getting flagged
ignore-words-list = "atleast"
# use the 'clear' dictionary for unambiguous spelling mistakes
builtin = "clear"
# disable warnings about binary files and wrong encoding
quiet-level = 3
[tool.isort]
profile = "black"
use_parentheses = true
multi_line_output = 3
include_trailing_comma = true
force_grid_wrap = 0
ensure_newline_before_comments = true
line_length = 88
balanced_wrapping = true
indent = " "
skip = ["build"]
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.17)
project(cc-clients LANGUAGES C CXX)
#
# Options
#
option(TRITON_ENABLE_CC_HTTP "Build C++ HTTP client libraries" OFF)
option(TRITON_ENABLE_CC_GRPC "Build C++ GRPC client libraries" OFF)
option(TRITON_ENABLE_PERF_ANALYZER "Enable Performance Analyzer" OFF)
option(TRITON_ENABLE_EXAMPLES "Include examples in build" OFF)
option(TRITON_ENABLE_TESTS "Include tests in build" OFF)
option(TRITON_ENABLE_GPU "Enable GPU support in libraries" OFF)
option(TRITON_USE_THIRD_PARTY "Use local version of third party libraries" ON)
option(TRITON_KEEP_TYPEINFO "Keep typeinfo symbols by disabling ldscript" OFF)
option(TRITON_ENABLE_ZLIB "Include ZLIB library in build" ON)
set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
#
# Dependencies
#
include(FetchContent)
FetchContent_Declare(
repo-common
GIT_REPOSITORY https://github.com/triton-inference-server/common.git
GIT_TAG ${TRITON_COMMON_REPO_TAG}
GIT_SHALLOW ON
)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/9406a60c7839052e4944ea4dbc8344762a89f9bd.zip
)
if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
set(TRITON_COMMON_ENABLE_PROTOBUF ON)
set(TRITON_COMMON_ENABLE_GRPC ON)
if(TRITON_ENABLE_PERF_ANALYZER)
FetchContent_Declare(
repo-core
GIT_REPOSITORY https://github.com/triton-inference-server/core.git
GIT_TAG ${TRITON_CORE_REPO_TAG}
GIT_SHALLOW ON
)
FetchContent_MakeAvailable(repo-core)
endif() # TRITON_ENABLE_PERF_ANALYZER
endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
if(NOT TRITON_ENABLE_PERF_ANALYZER AND NOT TRITON_ENABLE_CC_HTTP AND NOT TRITON_ENABLE_EXAMPLES)
set(TRITON_COMMON_ENABLE_JSON OFF)
endif()
if(TRITON_ENABLE_TESTS OR TRITON_ENABLE_PERF_ANALYZER)
FetchContent_MakeAvailable(googletest)
endif()
FetchContent_MakeAvailable(repo-common)
if(TRITON_ENABLE_TESTS)
include_directories(
${repo-common_SOURCE_DIR}/include
)
endif() # TRITON_ENABLE_TESTS
#
# CUDA
#
if(TRITON_ENABLE_GPU)
find_package(CUDAToolkit REQUIRED)
endif() # TRITON_ENABLE_GPU
#
# libcurl
#
if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
find_package(CURL REQUIRED)
message(STATUS "Using curl ${CURL_VERSION}")
endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER
#
# Protobuf
#
if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
set(protobuf_MODULE_COMPATIBLE TRUE CACHE BOOL "protobuf_MODULE_COMPATIBLE" FORCE)
find_package(Protobuf CONFIG REQUIRED)
message(STATUS "Using protobuf ${Protobuf_VERSION}")
include_directories(${Protobuf_INCLUDE_DIRS})
endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
#
# GRPC
#
if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
find_package(gRPC CONFIG REQUIRED)
message(STATUS "Using gRPC ${gRPC_VERSION}")
include_directories($<TARGET_PROPERTY:gRPC::grpc,INTERFACE_INCLUDE_DIRECTORIES>)
endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
add_subdirectory(library)
endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC)
if(TRITON_ENABLE_EXAMPLES)
add_subdirectory(examples)
endif() # TRITON_ENABLE_EXAMPLES
if(TRITON_ENABLE_TESTS)
add_subdirectory(tests)
endif() # TRITON_ENABLE_TESTS
endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC
if(TRITON_ENABLE_PERF_ANALYZER)
add_subdirectory(perf_analyzer)
endif() # TRITON_ENABLE_PERF_ANALYZER
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required (VERSION 3.18)
if(WIN32)
message("C++ examples are not currently supported on Windows because "
"they require functionalities that are UNIX specific.")
else()
if(TRITON_ENABLE_CC_HTTP AND TRITON_ENABLE_CC_GRPC)
#
# yolov7-tiny
#
find_package(OpenCV REQUIRED)
add_executable(
yolov7-tiny
yolov7-tiny.cc
$<TARGET_OBJECTS:json-utils-library>
)
target_include_directories(
yolov7-tiny
PRIVATE ${OpenCV_INCLUDE_DIRS}
)
target_link_libraries(
yolov7-tiny
PRIVATE
grpcclient_static
httpclient_static
${OpenCV_LIBS}
)
install(
TARGETS yolov7-tiny
RUNTIME DESTINATION bin
)
#
# resnet50
#
find_package(OpenCV REQUIRED)
add_executable(
resnet50
resnet50.cc
$<TARGET_OBJECTS:json-utils-library>
)
target_include_directories(
resnet50
PRIVATE ${OpenCV_INCLUDE_DIRS}
)
target_link_libraries(
resnet50
PRIVATE
grpcclient_static
httpclient_static
${OpenCV_LIBS}
)
install(
TARGETS resnet50
RUNTIME DESTINATION bin
)
#
# image_client
#
find_package(OpenCV REQUIRED)
add_executable(
image_client
image_client.cc
$<TARGET_OBJECTS:json-utils-library>
)
target_include_directories(
image_client
PRIVATE ${OpenCV_INCLUDE_DIRS}
)
target_link_libraries(
image_client
PRIVATE
grpcclient_static
httpclient_static
${OpenCV_LIBS}
)
install(
TARGETS image_client
RUNTIME DESTINATION bin
)
#
# ensemble_image_client
#
add_executable(
ensemble_image_client
ensemble_image_client.cc
$<TARGET_OBJECTS:json-utils-library>
)
target_link_libraries(
ensemble_image_client
PRIVATE
grpcclient_static
httpclient_static
)
install(
TARGETS ensemble_image_client
RUNTIME DESTINATION bin
)
#
# reuse_infer_objects_client
#
add_executable(
reuse_infer_objects_client
reuse_infer_objects_client.cc
$<TARGET_OBJECTS:shm-utils-library>
)
target_link_libraries(
reuse_infer_objects_client
PRIVATE
grpcclient_static
httpclient_static
)
install(
TARGETS reuse_infer_objects_client
RUNTIME DESTINATION bin
)
endif() # TRITON_ENABLE_CC_HTTP AND TRITON_ENABLE_CC_GRPC
if(TRITON_ENABLE_CC_GRPC)
#
# simple_grpc_health_metadata
#
add_executable(simple_grpc_health_metadata simple_grpc_health_metadata.cc)
target_link_libraries(
simple_grpc_health_metadata
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_health_metadata
RUNTIME DESTINATION bin
)
#
# simple_grpc_model_control
#
add_executable(simple_grpc_model_control simple_grpc_model_control.cc)
target_link_libraries(
simple_grpc_model_control
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_model_control
RUNTIME DESTINATION bin
)
#
# simple_grpc_infer_client
#
add_executable(simple_grpc_infer_client simple_grpc_infer_client.cc)
target_link_libraries(
simple_grpc_infer_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_infer_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_keepalive_client
#
add_executable(simple_grpc_keepalive_client simple_grpc_keepalive_client.cc)
target_link_libraries(
simple_grpc_keepalive_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_keepalive_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_custom_args_client
#
add_executable(simple_grpc_custom_args_client simple_grpc_custom_args_client.cc)
target_link_libraries(
simple_grpc_custom_args_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_custom_args_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_string_infer_client
#
add_executable(simple_grpc_string_infer_client simple_grpc_string_infer_client.cc)
target_link_libraries(
simple_grpc_string_infer_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_string_infer_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_async_infer_client
#
add_executable(simple_grpc_async_infer_client simple_grpc_async_infer_client.cc)
target_link_libraries(
simple_grpc_async_infer_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_async_infer_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_sequence_stream_infer_client
#
add_executable(simple_grpc_sequence_stream_infer_client simple_grpc_sequence_stream_infer_client.cc)
target_link_libraries(
simple_grpc_sequence_stream_infer_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_sequence_stream_infer_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_sequence_sync_infer_client
#
add_executable(simple_grpc_sequence_sync_infer_client simple_grpc_sequence_sync_infer_client.cc)
target_link_libraries(
simple_grpc_sequence_sync_infer_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_sequence_sync_infer_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_shm_client
#
add_executable(
simple_grpc_shm_client
simple_grpc_shm_client.cc
$<TARGET_OBJECTS:shm-utils-library>
)
target_link_libraries(
simple_grpc_shm_client
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_shm_client
RUNTIME DESTINATION bin
)
#
# simple_grpc_custom_repeat
#
add_executable(simple_grpc_custom_repeat simple_grpc_custom_repeat.cc)
target_link_libraries(
simple_grpc_custom_repeat
PRIVATE
grpcclient_static
)
install(
TARGETS simple_grpc_custom_repeat
RUNTIME DESTINATION bin
)
if(${TRITON_ENABLE_GPU})
#
# simple_grpc_cudashm_client
#
set(
SIMPLE_GRPC_CUDA_SHM_SRCS
simple_grpc_cudashm_client.cc
)
set(
SIMPLE_GRPC_CUDA_SHM_HDRS
)
add_executable(simple_grpc_cudashm_client ${SIMPLE_GRPC_CUDA_SHM_SRCS} ${SIMPLE_GRPC_CUDA_SHM_HDRS})
target_include_directories(simple_grpc_cudashm_client PRIVATE ${CUDA_INCLUDE_DIRS})
target_link_libraries(
simple_grpc_cudashm_client
PRIVATE
grpcclient_static
${CUDA_LIBRARIES}
)
install(
TARGETS simple_grpc_cudashm_client
RUNTIME DESTINATION bin
)
endif() # TRITON_ENABLE_GPU
endif() # TRITON_ENABLE_CC_GRPC
if(TRITON_ENABLE_CC_HTTP)
#
# simple_http_health_metadata
#
add_executable(
simple_http_health_metadata
simple_http_health_metadata.cc
$<TARGET_OBJECTS:json-utils-library>
)
target_link_libraries(
simple_http_health_metadata
PRIVATE
httpclient_static
)
install(
TARGETS simple_http_health_metadata
RUNTIME DESTINATION bin
)
#
# simple_http_model_control
#
add_executable(
simple_http_model_control
simple_http_model_control.cc
$<TARGET_OBJECTS:json-utils-library>
)
target_link_libraries(
simple_http_model_control
PRIVATE
httpclient_static
)
install(
TARGETS simple_http_model_control
RUNTIME DESTINATION bin
)
#
# simple_http_infer_client
#
add_executable(simple_http_infer_client simple_http_infer_client.cc)
target_link_libraries(
simple_http_infer_client
PRIVATE
httpclient_static
)
install(
TARGETS simple_http_infer_client
RUNTIME DESTINATION bin
)
#
# simple_http_string_infer_client
#
add_executable(simple_http_string_infer_client simple_http_string_infer_client.cc)
target_link_libraries(
simple_http_string_infer_client
PRIVATE
httpclient_static
)
install(
TARGETS simple_http_string_infer_client
RUNTIME DESTINATION bin
)
#
# simple_http_async_infer_client
#
add_executable(simple_http_async_infer_client simple_http_async_infer_client.cc)
target_link_libraries(
simple_http_async_infer_client
PRIVATE
httpclient_static
)
install(
TARGETS simple_http_async_infer_client
RUNTIME DESTINATION bin
)
#
# simple_http_sequence_sync_infer_client
#
add_executable(simple_http_sequence_sync_infer_client simple_http_sequence_sync_infer_client.cc)
target_link_libraries(
simple_http_sequence_sync_infer_client
PRIVATE
httpclient_static
)
install(
TARGETS simple_http_sequence_sync_infer_client
RUNTIME DESTINATION bin
)
#
# simple_http_shm_client
#
add_executable(
simple_http_shm_client
simple_http_shm_client.cc
$<TARGET_OBJECTS:shm-utils-library>
)
target_link_libraries(
simple_http_shm_client
PRIVATE
httpclient_static
rt
)
install(
TARGETS simple_http_shm_client
RUNTIME DESTINATION bin
)
if(${TRITON_ENABLE_GPU})
#
# simple_http_cudashm_client
#
set(
SIMPLE_HTTP_CUDA_SHM_SRCS
simple_http_cudashm_client.cc
)
set(
SIMPLE_HTTP_CUDA_SHM_HDRS
)
add_executable(simple_http_cudashm_client ${SIMPLE_HTTP_CUDA_SHM_SRCS} ${SIMPLE_HTTP_CUDA_SHM_HDRS})
target_include_directories(simple_http_cudashm_client PRIVATE ${CUDA_INCLUDE_DIRS})
target_link_libraries(
simple_http_cudashm_client
PRIVATE
httpclient_static
${CUDA_LIBRARIES}
)
install(
TARGETS simple_http_cudashm_client
RUNTIME DESTINATION bin
)
endif() # TRITON_ENABLE_GPU
endif() # TRITON_ENABLE_CC_HTTP
endif() # WIN32
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <dirent.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fstream>
#include <iostream>
#include <iterator>
#include <sstream>
#include <string>
#include "grpc_client.h"
#include "http_client.h"
#include "json_utils.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
Postprocess(
const std::unique_ptr<tc::InferResult> result,
const std::vector<std::string>& filenames, const size_t batch_size,
const size_t topk)
{
std::string output_name("OUTPUT");
if (!result->RequestStatus().IsOk()) {
std::cerr << "inference failed with error: " << result->RequestStatus()
<< std::endl;
exit(1);
}
if (filenames.size() != batch_size) {
std::cerr << "expected " << batch_size << " filenames, got "
<< filenames.size() << std::endl;
exit(1);
}
// Get and validate the shape and datatype
std::vector<int64_t> shape;
tc::Error err = result->Shape(output_name, &shape);
if (!err.IsOk()) {
std::cerr << "unable to get shape for " << output_name << std::endl;
exit(1);
}
// Validate shape
if ((shape.size() != 2) || (shape[0] != (int)batch_size) ||
(shape[1] != (int)topk)) {
std::cerr << "received incorrect shapes for " << output_name << std::endl;
exit(1);
}
std::string datatype;
err = result->Datatype(output_name, &datatype);
if (!err.IsOk()) {
std::cerr << "unable to get datatype for " << output_name << std::endl;
exit(1);
}
// Validate datatype
if (datatype.compare("BYTES") != 0) {
std::cerr << "received incorrect datatype for " << output_name << ": "
<< datatype << std::endl;
exit(1);
}
std::vector<std::string> result_data;
err = result->StringData(output_name, &result_data);
if (!err.IsOk()) {
std::cerr << "unable to get data for " << output_name << std::endl;
exit(1);
}
if (result_data.size() != (topk * batch_size)) {
std::cerr << "unexpected number of strings in the result, expected "
<< (topk * batch_size) << ", got " << result_data.size()
<< std::endl;
exit(1);
}
size_t index = 0;
for (size_t b = 0; b < batch_size; ++b) {
std::cout << "Image '" << filenames[b] << "':" << std::endl;
for (size_t c = 0; c < topk; ++c) {
std::istringstream is(result_data[index]);
int count = 0;
std::string token;
while (getline(is, token, ':')) {
if (count == 0) {
std::cout << " " << token;
} else if (count == 1) {
std::cout << " (" << token << ")";
} else if (count == 2) {
std::cout << " = " << token;
}
count++;
}
std::cout << std::endl;
index++;
}
}
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0]
<< " [options] <image filename / image folder>" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-c <topk>" << std::endl;
std::cerr << "\t-i <Protocol used to communicate with inference service>"
<< std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << std::endl;
std::cerr << "For -c, the <topk> classes will be returned, default is 1."
<< std::endl;
std::cerr
<< "For -i, available protocols are 'grpc' and 'http'. Default is 'http."
<< std::endl;
exit(1);
}
union TritonClient {
TritonClient()
{
new (&http_client_) std::unique_ptr<tc::InferenceServerHttpClient>{};
}
~TritonClient() {}
std::unique_ptr<tc::InferenceServerHttpClient> http_client_;
std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_;
};
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8000");
std::string protocol = "http";
size_t topk = 1;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vi:u:p:c:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'i':
protocol = optarg;
break;
case 'u':
url = optarg;
break;
case 'c':
topk = std::atoi(optarg);
break;
case '?':
Usage(argv);
break;
}
}
if (topk <= 0) {
Usage(argv, "topk must be > 0");
}
// The ensemble model takes 1 input tensor with shape [ 1 ] and STRING
// data type and returns 1 output tensor as top k (see '-c' flag)
// classification result of the input.
std::string model_name = "preprocess_inception_ensemble";
// Create the inference client for the model.
TritonClient triton_client;
tc::Error err;
if (protocol == "http") {
err = tc::InferenceServerHttpClient::Create(
&triton_client.http_client_, url, verbose);
} else {
err = tc::InferenceServerGrpcClient::Create(
&triton_client.grpc_client_, url, verbose);
}
if (!err.IsOk()) {
std::cerr << "error: unable to create client for inference: " << err
<< std::endl;
exit(1);
}
if (optind >= argc) {
Usage(argv, "image file or image folder must be specified");
}
if (!err.IsOk()) {
std::cerr << "error: unable to create inference context: " << err
<< std::endl;
exit(1);
}
// Obtain a list of the image names to be processed
std::vector<std::string> image_filenames;
struct stat name_stat;
if (stat(argv[optind], &name_stat) != 0) {
std::cerr << "Failed to find '" << std::string(argv[optind])
<< "': " << strerror(errno) << std::endl;
exit(1);
}
if (name_stat.st_mode & S_IFDIR) {
const std::string dirname = argv[optind];
DIR* dir_ptr = opendir(dirname.c_str());
struct dirent* d_ptr;
while ((d_ptr = readdir(dir_ptr)) != NULL) {
const std::string filename = d_ptr->d_name;
if ((filename != ".") && (filename != "..")) {
image_filenames.push_back(dirname + "/" + filename);
}
}
closedir(dir_ptr);
} else {
image_filenames.push_back(argv[optind]);
}
// Sort the filenames so that we always visit them in the same order
// (readdir does not guarantee any particular order).
std::sort(image_filenames.begin(), image_filenames.end());
// Read the raw image as string
std::vector<std::vector<std::string>> images;
for (const auto& fn : image_filenames) {
images.emplace_back();
auto& image_str = images.back();
std::ifstream file(fn);
file >> std::noskipws;
image_str.emplace_back(
(std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
if (image_str.back().empty()) {
std::cerr << "error: unable to read image file " << fn << std::endl;
exit(1);
}
}
// this client only send one request for simplicity. So the maximum number
// of the images to be processed is limited by the maximum batch size
size_t batch_size = 0;
if (protocol == "http") {
std::string model_config;
err = triton_client.http_client_->ModelConfig(&model_config, model_name);
if (!err.IsOk()) {
std::cerr << "error: failed to get model config: " << err << std::endl;
}
rapidjson::Document model_config_json;
err = tc::ParseJson(&model_config_json, model_config);
if (!err.IsOk()) {
std::cerr << "error: failed to parse model config: " << err << std::endl;
}
const auto bs_itr = model_config_json.FindMember("max_batch_size");
if (bs_itr != model_config_json.MemberEnd()) {
batch_size = bs_itr->value.GetInt();
}
} else {
inference::ModelConfigResponse model_config;
err = triton_client.grpc_client_->ModelConfig(&model_config, model_name);
if (!err.IsOk()) {
std::cerr << "error: failed to get model config: " << err << std::endl;
}
batch_size = model_config.config().max_batch_size();
}
if (images.size() > batch_size) {
std::cerr << "The number of images exceeds maximum batch size, only the"
<< " first " << batch_size << " images, sorted by name"
<< " alphabetically, will be processed" << std::endl;
}
batch_size = (images.size() < batch_size) ? images.size() : batch_size;
// Initialize the inputs with the data.
tc::InferInput* input;
std::vector<int64_t> shape{(int64_t)batch_size, 1};
err = tc::InferInput::Create(&input, "INPUT", shape, "BYTES");
if (!err.IsOk()) {
std::cerr << "unable to get input: " << err << std::endl;
exit(1);
}
std::shared_ptr<tc::InferInput> input_ptr(input);
tc::InferRequestedOutput* output;
// Set the number of classification expected
err = tc::InferRequestedOutput::Create(&output, "OUTPUT", topk);
if (!err.IsOk()) {
std::cerr << "unable to get output: " << err << std::endl;
exit(1);
}
std::shared_ptr<tc::InferRequestedOutput> output_ptr(output);
std::vector<tc::InferInput*> inputs = {input_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {output_ptr.get()};
tc::InferOptions options(model_name);
FAIL_IF_ERR(input_ptr->Reset(), "unable to reset INPUT");
for (size_t i = 0; i < batch_size; i++) {
FAIL_IF_ERR(
input_ptr->AppendFromString(images[i]), "unable to set data for INPUT");
}
// Send inference request to the inference server.
tc::InferResult* results;
if (protocol == "http") {
FAIL_IF_ERR(
triton_client.http_client_->Infer(&results, options, inputs, outputs),
"unable to run model");
} else {
FAIL_IF_ERR(
triton_client.grpc_client_->Infer(&results, options, inputs, outputs),
"unable to run model");
}
std::unique_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);
// Print classification results
Postprocess(std::move(results_ptr), image_filenames, batch_size, topk);
return 0;
}
This diff is collapsed.
#include <dirent.h>
#include <getopt.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#include <algorithm>
#include <condition_variable>
#include <fstream>
#include <iostream>
#include <iterator>
#include <mutex>
#include <queue>
#include <string>
#include "grpc_client.h"
#include "http_client.h"
#include "json_utils.h"
#include <opencv2/opencv.hpp>
#include <opencv2/core/version.hpp>
#if CV_MAJOR_VERSION == 2
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#elif CV_MAJOR_VERSION >= 3
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#endif
#if CV_MAJOR_VERSION == 4
#define GET_TRANSFORMATION_CODE(x) cv::COLOR_##x
#else
#define GET_TRANSFORMATION_CODE(x) CV_##x
#endif
using namespace cv;
namespace tc = triton::client;
namespace {
enum ProtocolType { HTTP = 0, GRPC = 1 };
struct ModelInfo {
std::string output_name_;
std::string input_name_;
std::string input_datatype_;
int input_c_;
int input_h_;
int input_w_;
std::string input_format_;
int type1_;
int type3_;
int max_batch_size_;
};
std::vector<float> ComputeSoftmax(const std::vector<float>& results)
{
float maxValue=-3.40e+38F;
for(int i=0;i<results.size();++i)
{
if(results[i]>maxValue)
{
maxValue=results[i];
}
}
std::vector<float> softmaxResults(results.size());
float sum=0.0;
for(int i=0;i<results.size();++i)
{
softmaxResults[i]= exp((float)(results[i] - maxValue));
sum+=softmaxResults[i];
}
for(int i=0;i<results.size();++i)
{
softmaxResults[i]= softmaxResults[i]/sum;
}
return softmaxResults;
}
void
Preprocess(
const std::string& filename, int img_type1, int img_type3, size_t img_channels,
const cv::Size& img_size, std::vector<uint8_t>* input_data)
{
cv::Mat img = cv::imread(filename, 1);
if (img.empty()) {
std::cerr << "error: unable to decode image " << filename << std::endl;
exit(1);
}
cv::Mat sample;
if ((img.channels() == 3) && (img_channels == 3)) {
cv::cvtColor(img, sample, GET_TRANSFORMATION_CODE(BGR2RGB));
} else {
std::cerr << "unexpected number of channels " << img.channels()
<< " in input image, model expects " << img_channels << "."
<< std::endl;
exit(1);
}
cv::Mat sample_resized;
cv::resize(sample, sample_resized, img_size);
cv::Mat sample_type;
sample_resized.convertTo(sample_type, (img_channels == 3) ? img_type3 : img_type1);
cv::Mat sample_final;
sample_final = sample_type.mul(cv::Scalar(1/58.395, 1/57.12, 1/57.375));
sample_final = sample_final - cv::Scalar(123.675, 116.28, 103.53);
size_t img_byte_size = sample_final.total() * sample_final.elemSize();
size_t pos = 0;
input_data->resize(img_byte_size);
std::vector<cv::Mat> input_bgr_channels;
for (size_t i = 0; i < img_channels; ++i) {
input_bgr_channels.emplace_back(img_size.height, img_size.width, img_type1, &((*input_data)[pos]));
pos += input_bgr_channels.back().total() * input_bgr_channels.back().elemSize();
}
cv::split(sample_final, input_bgr_channels);
if (pos != img_byte_size) {
std::cerr << "unexpected total size of channels " << pos << ", expecting "
<< img_byte_size << std::endl;
exit(1);
}
}
void Postprocess(
const std::unique_ptr<tc::InferResult> result,
const std::vector<std::string>& filenames, const size_t batch_size,
const std::string& output_name, const bool batching)
{
if (!result->RequestStatus().IsOk()) {
std::cerr << "inference failed with error: " << result->RequestStatus()
<< std::endl;
exit(1);
}
if (filenames.size() != batch_size) {
std::cerr << "expected " << batch_size << " filenames, got "
<< filenames.size() << std::endl;
exit(1);
}
// Get and validate the shape and datatype
std::vector<int64_t> shape;
tc::Error err = result->Shape(output_name, &shape);
if (!err.IsOk()) {
std::cerr << "unable to get shape for " << output_name << std::endl;
exit(1);
}
std::string datatype;
err = result->Datatype(output_name, &datatype);
if (!err.IsOk()) {
std::cerr << "unable to get datatype for " << output_name << std::endl;
exit(1);
}
const uint8_t* result_data;
size_t outputCount = 0;
err = result->RawData(output_name, &result_data, &outputCount);
if (!err.IsOk()) {
std::cerr << "unable to get data for " << output_name << std::endl;
exit(1);
}
float* pdata = new float[1000];
memcpy(pdata, result_data, sizeof(uint8_t)*outputCount);
std::vector<float> logit;
for(int j=0; j<1000; ++j)
{
logit.push_back(pdata[j]);
}
std::vector<float> probs = ComputeSoftmax(logit);
for(int j=0; j<1000; ++j)
{
if (probs[j] >= 0.5)
fprintf(stdout,"label:%d,confidence:%.3f\n", j, probs[j]);
}
}
bool ParseType(const std::string& dtype, int* type1, int* type3)
{
if (dtype.compare("UINT8") == 0) {
*type1 = CV_8UC1;
*type3 = CV_8UC3;
} else if (dtype.compare("INT8") == 0) {
*type1 = CV_8SC1;
*type3 = CV_8SC3;
} else if (dtype.compare("UINT16") == 0) {
*type1 = CV_16UC1;
*type3 = CV_16UC3;
} else if (dtype.compare("INT16") == 0) {
*type1 = CV_16SC1;
*type3 = CV_16SC3;
} else if (dtype.compare("INT32") == 0) {
*type1 = CV_32SC1;
*type3 = CV_32SC3;
} else if (dtype.compare("FP32") == 0) {
*type1 = CV_32FC1;
*type3 = CV_32FC3;
} else if (dtype.compare("FP64") == 0) {
*type1 = CV_64FC1;
*type3 = CV_64FC3;
} else {
return false;
}
return true;
}
void ParseModelHttp(
const rapidjson::Document& model_metadata,
const rapidjson::Document& model_config, const size_t batch_size,
ModelInfo* model_info)
{
const auto& input_itr = model_metadata.FindMember("inputs");
size_t input_count = 0;
if (input_itr != model_metadata.MemberEnd()) {
input_count = input_itr->value.Size();
}
if (input_count != 1) {
std::cerr << "expecting 1 input, got " << input_count << std::endl;
exit(1);
}
const auto& output_itr = model_metadata.FindMember("outputs");
size_t output_count = 0;
if (output_itr != model_metadata.MemberEnd()) {
output_count = output_itr->value.Size();
}
if (output_count != 1) {
std::cerr << "expecting 1 output, got " << output_count << std::endl;
exit(1);
}
const auto& input_config_itr = model_config.FindMember("input");
input_count = 0;
if (input_config_itr != model_config.MemberEnd()) {
input_count = input_config_itr->value.Size();
}
if (input_count != 1) {
std::cerr << "expecting 1 input in model configuration, got " << input_count
<< std::endl;
exit(1);
}
const auto& input_metadata = *input_itr->value.Begin();
const auto& input_config = *input_config_itr->value.Begin();
const auto& output_metadata = *output_itr->value.Begin();
const auto& output_dtype_itr = output_metadata.FindMember("datatype");
if (output_dtype_itr == output_metadata.MemberEnd()) {
std::cerr << "output missing datatype in the metadata for model'"
<< model_metadata["name"].GetString() << "'" << std::endl;
exit(1);
}
auto datatype = std::string(output_dtype_itr->value.GetString(),
output_dtype_itr->value.GetStringLength());
if (datatype.compare("FP32") != 0) {
std::cerr << "expecting output datatype to be FP32, model '"
<< model_metadata["name"].GetString() << "' output type is '"
<< datatype << "'" << std::endl;
exit(1);
}
int max_batch_size = 0;
const auto bs_itr = model_config.FindMember("max_batch_size");
if (bs_itr != model_config.MemberEnd()) {
max_batch_size = bs_itr->value.GetUint();
}
model_info->max_batch_size_ = max_batch_size;
if (max_batch_size == 0) {
if (batch_size != 1) {
std::cerr << "batching not supported for model '"
<< model_metadata["name"].GetString() << "'" << std::endl;
exit(1);
}
} else {
if (batch_size > (size_t)max_batch_size) {
std::cerr << "expecting batch size <= " << max_batch_size
<< " for model '" << model_metadata["name"].GetString() << "'"
<< std::endl;
exit(1);
}
}
const bool input_batch_dim = (max_batch_size == 0);
const size_t expected_input_dims = 3 + (input_batch_dim ? 1 : 0);
const auto input_shape_itr = input_metadata.FindMember("shape");
model_info->input_format_ = std::string(input_config["format"].GetString(), input_config["format"].GetStringLength());
model_info->output_name_ = std::string(output_metadata["name"].GetString(), output_metadata["name"].GetStringLength());
model_info->input_name_ = std::string(input_metadata["name"].GetString(), input_metadata["name"].GetStringLength());
model_info->input_datatype_ = std::string(input_metadata["datatype"].GetString(), input_metadata["datatype"].GetStringLength());
model_info->input_c_ = input_shape_itr->value[1].GetInt();
model_info->input_h_ = input_shape_itr->value[2].GetInt();
model_info->input_w_ = input_shape_itr->value[3].GetInt();
if (!ParseType(model_info->input_datatype_, &(model_info->type1_), &(model_info->type3_))) {
std::cerr << "unexpected input datatype '" << model_info->input_datatype_
<< "' for model \"" << model_metadata["name"].GetString()
<< std::endl;
exit(1);
}
}
union TritonClient {
TritonClient()
{
new (&http_client_) std::unique_ptr<tc::InferenceServerHttpClient>{};
}
~TritonClient() {}
std::unique_ptr<tc::InferenceServerHttpClient> http_client_;
std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_;
};
}
int
main(int argc, char** argv)
{
bool verbose = false;
bool async = false;
int batch_size = 1;
if (argc < 3 || argc > 3)
{
fprintf(stdout, "Two args are required: ./a resnet_50 image_path\n");
return -1;
}
std::string model_name = argv[1];
std::string fileName = argv[2];
std::string preprocess_output_filename;
std::string model_version = "";
std::string url("localhost:8000");
ProtocolType protocol = ProtocolType::HTTP;
tc::Headers http_headers;
TritonClient triton_client;
tc::Error err;
err = tc::InferenceServerHttpClient::Create(
&triton_client.http_client_, url, verbose);
if (!err.IsOk()) {
std::cerr << "error: unable to create client for inference: " << err << std::endl;
exit(1);
}
ModelInfo model_info;
std::string model_metadata;
err = triton_client.http_client_->ModelMetadata(&model_metadata, model_name, model_version, http_headers);
if (!err.IsOk()) {
std::cerr << "error: failed to get model metadata: " << err << std::endl;
}
rapidjson::Document model_metadata_json;
err = tc::ParseJson(&model_metadata_json, model_metadata);
if (!err.IsOk()) {
std::cerr << "error: failed to parse model metadata: " << err
<< std::endl;
}
std::string model_config;
err = triton_client.http_client_->ModelConfig(&model_config, model_name, model_version, http_headers);
if (!err.IsOk()) {
std::cerr << "error: failed to get model config: " << err << std::endl;
}
rapidjson::Document model_config_json;
err = tc::ParseJson(&model_config_json, model_config);
if (!err.IsOk()) {
std::cerr << "error: failed to parse model config: " << err << std::endl;
}
ParseModelHttp( model_metadata_json, model_config_json, batch_size, &model_info);
std::vector<std::string> image_filenames;
struct stat name_stat;
if (stat(fileName.c_str(), &name_stat) != 0) {
std::cerr << "Failed to find '" << fileName << "': " << strerror(errno) << std::endl;
exit(1);
}
if (name_stat.st_mode & S_IFDIR) {
const std::string dirname = fileName;
DIR* dir_ptr = opendir(dirname.c_str());
struct dirent* d_ptr;
while ((d_ptr = readdir(dir_ptr)) != NULL) {
const std::string filename = d_ptr->d_name;
if ((filename != ".") && (filename != "..")) {
image_filenames.push_back(dirname + "/" + filename);
}
}
closedir(dir_ptr);
} else {
image_filenames.push_back(fileName);
}
std::sort(image_filenames.begin(), image_filenames.end());
std::vector<std::vector<uint8_t>> image_data;
for (const auto& fn : image_filenames) {
image_data.emplace_back();
Preprocess(fn, model_info.type1_, model_info.type3_, model_info.input_c_,
cv::Size(model_info.input_w_, model_info.input_h_), &(image_data.back()));
if ((image_data.size() == 1) && !preprocess_output_filename.empty()) {
std::ofstream output_file(preprocess_output_filename);
std::ostream_iterator<uint8_t> output_iterator(output_file);
std::copy(image_data[0].begin(), image_data[0].end(), output_iterator);
}
}
std::vector<int64_t> shape;
shape.push_back(batch_size);
shape.push_back(model_info.input_c_);
shape.push_back(model_info.input_h_);
shape.push_back(model_info.input_w_);
tc::InferInput* input;
err = tc::InferInput::Create(&input, model_info.input_name_, shape, model_info.input_datatype_);
if (!err.IsOk()) {
std::cerr << "unable to get input: " << err << std::endl;
exit(1);
}
std::shared_ptr<tc::InferInput> input_ptr(input);
tc::InferRequestedOutput* output;
err = tc::InferRequestedOutput::Create(&output, model_info.output_name_);
if (!err.IsOk()) {
std::cerr << "unable to get output: " << err << std::endl;
exit(1);
}
std::shared_ptr<tc::InferRequestedOutput> output_ptr(output);
std::vector<tc::InferInput*> inputs = {input_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {output_ptr.get()};
tc::InferOptions options(model_name);
options.model_version_ = model_version;
std::vector<std::unique_ptr<tc::InferResult>> results;
std::vector<std::vector<std::string>> result_filenames;
size_t image_idx = 0;
size_t done_cnt = 0;
size_t sent_count = 0;
bool last_request = false;
std::mutex mtx;
std::condition_variable cv;
auto callback_func = [&](tc::InferResult* result)
{
{
std::lock_guard<std::mutex> lk(mtx);
results.emplace_back(result);
done_cnt++;
}
cv.notify_all();
};
while (!last_request) {
err = input_ptr->Reset();
if (!err.IsOk()) {
std::cerr << "failed resetting input: " << err << std::endl;
exit(1);
}
std::vector<std::string> input_filenames;
for (int idx = 0; idx < batch_size; ++idx) {
input_filenames.push_back(image_filenames[image_idx]);
err = input_ptr->AppendRaw(image_data[image_idx]);
if (!err.IsOk()) {
std::cerr << "failed setting input: " << err << std::endl;
exit(1);
}
image_idx = (image_idx + 1) % image_data.size();
if (image_idx == 0) {
last_request = true;
}
}
result_filenames.emplace_back(std::move(input_filenames));
options.request_id_ = std::to_string(sent_count);
double time1 = getTickCount();
tc::InferResult* result;
if (protocol == ProtocolType::HTTP) {
err = triton_client.http_client_->Infer(
&result, options, inputs, outputs, http_headers);
} else {
err = triton_client.grpc_client_->Infer(
&result, options, inputs, outputs, http_headers);
}
if (!err.IsOk()) {
std::cerr << "failed sending synchronous infer request: " << err
<< std::endl;
exit(1);
}
results.emplace_back(result);
double time2 = getTickCount();
double elapsedTime = (time2 - time1)*1000 / getTickFrequency();
fprintf(stdout, "inference time:%f ms\n", elapsedTime);
sent_count++;
}
for (size_t idx = 0; idx < results.size(); idx++) {
std::cout << "Request " << idx << ", batch size " << batch_size << std::endl;
Postprocess(
std::move(results[idx]), result_filenames[idx], batch_size,
model_info.output_name_, model_info.max_batch_size_ != 0);
}
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <iostream>
#include <string>
#include "grpc_client.h"
#include "http_client.h"
#include "shm_utils.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
union TritonClient {
TritonClient()
{
new (&http_client_) std::unique_ptr<tc::InferenceServerHttpClient>{};
}
~TritonClient() {}
std::unique_ptr<tc::InferenceServerHttpClient> http_client_;
std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_;
};
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(
result->Shape(name, &shape), "unable to get shape for '" + name + "'");
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for '" << name << "'"
<< std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype),
"unable to get datatype for '" + name + "'");
// Validate datatype
if (datatype.compare("INT32") != 0) {
std::cerr << "error: received incorrect datatype for '" << name
<< "': " << datatype << std::endl;
exit(1);
}
}
void
InferAndValidate(
const bool use_shared_memory, TritonClient& triton_client,
const std::string& protocol, const tc::InferOptions& options,
const tc::Headers& http_headers, std::vector<tc::InferInput*>& inputs,
const size_t input_byte_size,
std::vector<tc::InferRequestedOutput*>& outputs,
const size_t output_byte_size, std::vector<int*>& shm_ptrs)
{
std::vector<int32_t> input0_data(16);
std::vector<int32_t> input1_data(16);
int32_t* input0_data_ptr;
int32_t* input1_data_ptr;
int32_t* output0_data_ptr;
int32_t* output1_data_ptr;
FAIL_IF_ERR(inputs[0]->Reset(), "unable to reset input 'INPUT0'");
FAIL_IF_ERR(inputs[1]->Reset(), "unable to reset input 'INPUT1'");
if (use_shared_memory) {
input0_data_ptr = shm_ptrs[0];
input1_data_ptr = shm_ptrs[1];
FAIL_IF_ERR(
inputs[0]->SetSharedMemory(
"input_data", input_byte_size, 0 /* offset */),
"unable to set shared memory for INPUT0");
FAIL_IF_ERR(
inputs[1]->SetSharedMemory(
"input_data", input_byte_size, input_byte_size /* offset */),
"unable to set shared memory for INPUT1");
FAIL_IF_ERR(
outputs[0]->SetSharedMemory(
"output_data", output_byte_size, 0 /* offset */),
"unable to set shared memory for 'OUTPUT0'");
FAIL_IF_ERR(
outputs[1]->SetSharedMemory(
"output_data", output_byte_size, output_byte_size /* offset */),
"unable to set shared memory for 'OUTPUT1'");
} else {
input0_data_ptr = &input0_data[0];
input1_data_ptr = &input1_data[0];
// Create the data for the two input tensors. Initialize the first
// to unique integers and the second to all twos. We use twos instead
// of ones in input1_data to validate whether inputs were set correctly.
for (size_t i = 0; i < 16; ++i) {
input0_data[i] = i;
input1_data[i] = 2;
}
FAIL_IF_ERR(
inputs[0]->AppendRaw(
reinterpret_cast<uint8_t*>(&input0_data[0]),
input0_data.size() * sizeof(int32_t)),
"unable to set data for 'INPUT0'");
FAIL_IF_ERR(
inputs[1]->AppendRaw(
reinterpret_cast<uint8_t*>(&input1_data[0]),
input1_data.size() * sizeof(int32_t)),
"unable to set data for 'INPUT1'");
FAIL_IF_ERR(
outputs[0]->UnsetSharedMemory(),
"unable to unset shared memory for 'OUTPUT0'");
FAIL_IF_ERR(
outputs[1]->UnsetSharedMemory(),
"unable to unset shared memory for 'OUTPUT1'");
}
std::vector<const tc::InferRequestedOutput*> routputs = {
outputs[0], outputs[1]};
tc::InferResult* results;
if (protocol == "http") {
FAIL_IF_ERR(
triton_client.http_client_->Infer(
&results, options, inputs, routputs, http_headers),
"unable to run model");
} else {
FAIL_IF_ERR(
triton_client.grpc_client_->Infer(
&results, options, inputs, routputs, http_headers),
"unable to run model");
}
std::shared_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", results_ptr);
ValidateShapeAndDatatype("OUTPUT1", results_ptr);
if (use_shared_memory) {
std::cout << "\n\n======== SHARED_MEMORY ========\n";
output0_data_ptr = shm_ptrs[2];
output1_data_ptr = shm_ptrs[3];
} else {
std::cout << "\n\n======== NO_SHARED_MEMORY ========\n";
// Get pointers to the result returned...
size_t recv_output0_byte_size;
FAIL_IF_ERR(
results_ptr->RawData(
"OUTPUT0", (const uint8_t**)&output0_data_ptr,
&recv_output0_byte_size),
"unable to get result data for 'OUTPUT0'");
if (recv_output0_byte_size != output_byte_size) {
std::cerr << "error: received incorrect byte size for 'OUTPUT0': "
<< recv_output0_byte_size << std::endl;
exit(1);
}
size_t recv_output1_byte_size;
FAIL_IF_ERR(
results_ptr->RawData(
"OUTPUT1", (const uint8_t**)&output1_data_ptr,
&recv_output1_byte_size),
"unable to get result data for 'OUTPUT1'");
if (recv_output1_byte_size != output_byte_size) {
std::cerr << "error: received incorrect byte size for 'OUTPUT1': "
<< recv_output1_byte_size << std::endl;
exit(1);
}
}
for (size_t i = 0; i < 16; ++i) {
std::cout << input0_data_ptr[i] << " + " << input1_data_ptr[i] << " = "
<< output0_data_ptr[i] << std::endl;
std::cout << input0_data_ptr[i] << " - " << input1_data_ptr[i] << " = "
<< output1_data_ptr[i] << std::endl;
if ((input0_data_ptr[i] + input1_data_ptr[i]) != output0_data_ptr[i]) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if ((input0_data_ptr[i] - input1_data_ptr[i]) != output1_data_ptr[i]) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
std::cout << "\n======== END ========\n\n";
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
// Tests whether the same InferInput and InferRequestedOutput objects can be
// successfully used repeatedly for different inferences using/not-using
// shared memory.
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8000");
bool url_specified = false;
tc::Headers http_headers;
std::string protocol("http");
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:i:H:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
url_specified = true;
break;
case 'i':
protocol = optarg;
std::transform(
protocol.begin(), protocol.end(), protocol.begin(), ::tolower);
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
// Create the inference client for the server. From it
// extract and validate that the model meets the requirements for
// image classification.
TritonClient triton_client;
tc::Error err;
if (protocol == "http") {
err = tc::InferenceServerHttpClient::Create(
&triton_client.http_client_, url, verbose);
} else if (protocol == "grpc") {
if (!url_specified) {
url = "localhost:8001";
}
err = tc::InferenceServerGrpcClient::Create(
&triton_client.grpc_client_, url, verbose);
} else {
std::cerr
<< "error: unsupported protocol provided: only supports grpc or http."
<< std::endl;
exit(1);
}
if (!err.IsOk()) {
std::cerr << "error: unable to create client for inference: " << err
<< std::endl;
exit(1);
}
// Unregistering all shared memory regions for a clean
// start.
if (protocol == "http") {
FAIL_IF_ERR(
triton_client.http_client_->UnregisterSystemSharedMemory(),
"unable to unregister all system shared memory regions");
FAIL_IF_ERR(
triton_client.http_client_->UnregisterCudaSharedMemory(),
"unable to unregister all cuda shared memory regions");
} else {
FAIL_IF_ERR(
triton_client.grpc_client_->UnregisterSystemSharedMemory(),
"unable to unregister all system shared memory regions");
FAIL_IF_ERR(
triton_client.grpc_client_->UnregisterCudaSharedMemory(),
"unable to unregister all cuda shared memory regions");
}
std::vector<int64_t> shape{1, 16};
size_t input_byte_size = 64;
size_t output_byte_size = 64;
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
// Create Input0 and Input1 in Shared Memory. Initialize Input0 to unique
// integers and Input1 to all ones.
std::string shm_key = "/input_simple";
int shm_fd_ip, *input0_shm;
FAIL_IF_ERR(
tc::CreateSharedMemoryRegion(shm_key, input_byte_size * 2, &shm_fd_ip),
"");
FAIL_IF_ERR(
tc::MapSharedMemory(
shm_fd_ip, 0, input_byte_size * 2, (void**)&input0_shm),
"");
FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_ip), "");
int* input1_shm = (int*)(input0_shm + 16);
for (size_t i = 0; i < 16; ++i) {
*(input0_shm + i) = i;
*(input1_shm + i) = 1;
}
if (protocol == "http") {
FAIL_IF_ERR(
triton_client.http_client_->RegisterSystemSharedMemory(
"input_data", "/input_simple", input_byte_size * 2),
"failed to register input shared memory region");
} else {
FAIL_IF_ERR(
triton_client.grpc_client_->RegisterSystemSharedMemory(
"input_data", "/input_simple", input_byte_size * 2),
"failed to register input shared memory region");
}
// Generate the outputs to be requested.
tc::InferRequestedOutput* output0;
tc::InferRequestedOutput* output1;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
"unable to get 'OUTPUT0'");
std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
output0_ptr.reset(output0);
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
"unable to get 'OUTPUT1'");
std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
output1_ptr.reset(output1);
// Create Output0 and Output1 in Shared Memory
shm_key = "/output_simple";
int shm_fd_op;
int* output0_shm;
FAIL_IF_ERR(
tc::CreateSharedMemoryRegion(shm_key, output_byte_size * 2, &shm_fd_op),
"");
FAIL_IF_ERR(
tc::MapSharedMemory(
shm_fd_op, 0, output_byte_size * 2, (void**)&output0_shm),
"");
FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_op), "");
int* output1_shm = (int*)(output0_shm + 16);
if (protocol == "http") {
FAIL_IF_ERR(
triton_client.http_client_->RegisterSystemSharedMemory(
"output_data", "/output_simple", output_byte_size * 2),
"failed to register output shared memory region");
} else {
FAIL_IF_ERR(
triton_client.grpc_client_->RegisterSystemSharedMemory(
"output_data", "/output_simple", output_byte_size * 2),
"failed to register output shared memory region");
}
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
std::vector<tc::InferRequestedOutput*> outputs = {
output0_ptr.get(), output1_ptr.get()};
std::vector<int*> shm_ptrs = {
input0_shm, input1_shm, output0_shm, output1_shm};
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
// Issue inference using shared memory
InferAndValidate(
true /* use_shared_memory */, triton_client, protocol, options,
http_headers, inputs, input_byte_size, outputs, output_byte_size,
shm_ptrs);
// Issue inference without using shared memory
InferAndValidate(
false /* use_shared_memory */, triton_client, protocol, options,
http_headers, inputs, input_byte_size, outputs, output_byte_size,
shm_ptrs);
// Issue inference using shared memory
InferAndValidate(
true /* use_shared_memory */, triton_client, protocol, options,
http_headers, inputs, input_byte_size, outputs, output_byte_size,
shm_ptrs);
// Unregister shared memory
if (protocol == "http") {
FAIL_IF_ERR(
triton_client.http_client_->UnregisterSystemSharedMemory("input_data"),
"unable to unregister shared memory input region");
FAIL_IF_ERR(
triton_client.http_client_->UnregisterSystemSharedMemory("output_data"),
"unable to unregister shared memory output region");
} else {
FAIL_IF_ERR(
triton_client.grpc_client_->UnregisterSystemSharedMemory("input_data"),
"unable to unregister shared memory input region");
FAIL_IF_ERR(
triton_client.grpc_client_->UnregisterSystemSharedMemory("output_data"),
"unable to unregister shared memory output region");
}
// Cleanup shared memory
FAIL_IF_ERR(tc::UnmapSharedMemory(input0_shm, input_byte_size * 2), "");
FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/input_simple"), "");
FAIL_IF_ERR(tc::UnmapSharedMemory(output0_shm, output_byte_size * 2), "");
FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/output_simple"), "");
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <condition_variable>
#include <iostream>
#include <mutex>
#include <string>
#include "grpc_client.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(
result->Shape(name, &shape), "unable to get shape for '" + name + "'");
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for '" << name << "'"
<< std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype),
"unable to get datatype for '" + name + "'");
// Validate datatype
if (datatype.compare("INT32") != 0) {
std::cerr << "error: received incorrect datatype for '" << name
<< "': " << datatype << std::endl;
exit(1);
}
}
void
ValidateResult(
const std::shared_ptr<tc::InferResult> result,
std::vector<int32_t>& input0_data, std::vector<int32_t>& input1_data)
{
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", result);
ValidateShapeAndDatatype("OUTPUT1", result);
// Get pointers to the result returned...
int32_t* output0_data;
size_t output0_byte_size;
FAIL_IF_ERR(
result->RawData(
"OUTPUT0", (const uint8_t**)&output0_data, &output0_byte_size),
"unable to get result data for 'OUTPUT0'");
if (output0_byte_size != 64) {
std::cerr << "error: received incorrect byte size for 'OUTPUT0': "
<< output0_byte_size << std::endl;
exit(1);
}
int32_t* output1_data;
size_t output1_byte_size;
FAIL_IF_ERR(
result->RawData(
"OUTPUT1", (const uint8_t**)&output1_data, &output1_byte_size),
"unable to get result data for 'OUTPUT1'");
if (output0_byte_size != 64) {
std::cerr << "error: received incorrect byte size for 'OUTPUT1': "
<< output0_byte_size << std::endl;
exit(1);
}
for (size_t i = 0; i < 16; ++i) {
std::cout << input0_data[i] << " + " << input1_data[i] << " = "
<< *(output0_data + i) << std::endl;
std::cout << input0_data[i] << " - " << input1_data[i] << " = "
<< *(output1_data + i) << std::endl;
if ((input0_data[i] + input1_data[i]) != *(output0_data + i)) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if ((input0_data[i] - input1_data[i]) != *(output1_data + i)) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
// Get full response
std::cout << result->DebugString() << std::endl;
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-t <client timeout in microseconds>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8001");
tc::Headers http_headers;
uint32_t client_timeout = 0;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:t:H:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 't':
client_timeout = std::stoi(optarg);
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
// Create a InferenceServerGrpcClient instance to communicate with the
// server using gRPC protocol.
std::unique_ptr<tc::InferenceServerGrpcClient> client;
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(&client, url, verbose),
"unable to create grpc client");
// Create the data for the two input tensors. Initialize the first
// to unique integers and the second to all ones.
std::vector<int32_t> input0_data(16);
std::vector<int32_t> input1_data(16);
for (size_t i = 0; i < 16; ++i) {
input0_data[i] = i;
input1_data[i] = 1;
}
std::vector<int64_t> shape{1, 16};
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
FAIL_IF_ERR(
input0_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&input0_data[0]),
input0_data.size() * sizeof(int32_t)),
"unable to set data for INPUT0");
FAIL_IF_ERR(
input1_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&input1_data[0]),
input1_data.size() * sizeof(int32_t)),
"unable to set data for INPUT1");
// Generate the outputs to be requested.
tc::InferRequestedOutput* output0;
tc::InferRequestedOutput* output1;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
"unable to get 'OUTPUT0'");
std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
output0_ptr.reset(output0);
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
"unable to get 'OUTPUT1'");
std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
output1_ptr.reset(output1);
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
options.client_timeout_ = client_timeout;
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {
output0_ptr.get(), output1_ptr.get()};
// Send inference request to the inference server.
std::mutex mtx;
std::condition_variable cv;
size_t repeat_cnt = 2;
size_t done_cnt = 0;
for (size_t i = 0; i < repeat_cnt; i++) {
FAIL_IF_ERR(
client->AsyncInfer(
[&, i](tc::InferResult* result) {
{
std::shared_ptr<tc::InferResult> result_ptr;
result_ptr.reset(result);
std::lock_guard<std::mutex> lk(mtx);
std::cout << "Callback no." << i << " is called" << std::endl;
done_cnt++;
if (result_ptr->RequestStatus().IsOk()) {
ValidateResult(result_ptr, input0_data, input1_data);
} else {
std::cerr << "error: Inference failed: "
<< result_ptr->RequestStatus() << std::endl;
exit(1);
}
}
cv.notify_all();
},
options, inputs, outputs, http_headers),
"unable to run model");
}
// Wait until all callbacks are invoked
{
std::unique_lock<std::mutex> lk(mtx);
cv.wait(lk, [&]() {
if (done_cnt >= repeat_cnt) {
return true;
} else {
return false;
}
});
}
if (done_cnt == repeat_cnt) {
std::cout << "All done" << std::endl;
} else {
std::cerr << "Done cnt: " << done_cnt
<< " does not match repeat cnt: " << repeat_cnt << std::endl;
exit(1);
}
// Send another AsyncInfer whose callback defers the completed request
// to another thread (main thread) to handle
bool callback_invoked = false;
std::shared_ptr<tc::InferResult> result_placeholder;
FAIL_IF_ERR(
client->AsyncInfer(
[&](tc::InferResult* result) {
{
std::shared_ptr<tc::InferResult> result_ptr;
result_ptr.reset(result);
// Defer the response retrieval to main thread
std::lock_guard<std::mutex> lk(mtx);
callback_invoked = true;
result_placeholder = std::move(result_ptr);
}
cv.notify_all();
},
options, inputs, outputs, http_headers),
"unable to run model");
// Ensure callback is completed
{
std::unique_lock<std::mutex> lk(mtx);
cv.wait(lk, [&]() { return callback_invoked; });
}
// Get deferred response
std::cout << "Getting results from deferred response" << std::endl;
if (result_placeholder->RequestStatus().IsOk()) {
ValidateResult(result_placeholder, input0_data, input1_data);
} else {
std::cerr << "error: Inference failed: "
<< result_placeholder->RequestStatus() << std::endl;
exit(1);
}
tc::InferStat infer_stat;
client->ClientInferStat(&infer_stat);
std::cout << "completed_request_count " << infer_stat.completed_request_count
<< std::endl;
std::cout << "cumulative_total_request_time_ns "
<< infer_stat.cumulative_total_request_time_ns << std::endl;
std::cout << "cumulative_send_time_ns " << infer_stat.cumulative_send_time_ns
<< std::endl;
std::cout << "cumulative_receive_time_ns "
<< infer_stat.cumulative_receive_time_ns << std::endl;
std::cout << "PASS : Async Infer" << std::endl;
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cuda_runtime_api.h>
#include <unistd.h>
#include <iostream>
#include <string>
#include "grpc_client.h"
#include "shm_utils.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(
result->Shape(name, &shape), "unable to get shape for '" + name + "'");
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for '" << name << "'"
<< std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype),
"unable to get datatype for '" + name + "'");
// Validate datatype
if (datatype.compare("INT32") != 0) {
std::cerr << "error: received incorrect datatype for '" << name
<< "': " << datatype << std::endl;
exit(1);
}
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
#define FAIL_IF_CUDA_ERR(FUNC) \
{ \
const cudaError_t result = FUNC; \
if (result != cudaSuccess) { \
std::cerr << "CUDA exception (line " << __LINE__ \
<< "): " << cudaGetErrorName(result) << " (" \
<< cudaGetErrorString(result) << ")" << std::endl; \
exit(1); \
} \
}
void
CreateCUDAIPCHandle(
cudaIpcMemHandle_t* cuda_handle, void* input_d_ptr, int device_id = 0)
{
// Set the GPU device to the desired GPU
FAIL_IF_CUDA_ERR(cudaSetDevice(device_id));
// Create IPC handle for data on the gpu
FAIL_IF_CUDA_ERR(cudaIpcGetMemHandle(cuda_handle, input_d_ptr));
}
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8001");
tc::Headers http_headers;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
// Create a InferenceServerGrpcClient instance to communicate with the
// server using gRPC protocol.
std::unique_ptr<tc::InferenceServerGrpcClient> client;
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(&client, url, verbose),
"unable to create grpc client");
// Unregistering all shared memory regions for a clean
// start.
FAIL_IF_ERR(
client->UnregisterSystemSharedMemory(),
"unable to unregister all system shared memory regions");
FAIL_IF_ERR(
client->UnregisterCudaSharedMemory(),
"unable to unregister all cuda shared memory regions");
std::vector<int64_t> shape{1, 16};
size_t input_byte_size = 64;
size_t output_byte_size = 64;
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
// Create Input0 and Input1 in CUDA Shared Memory. Initialize Input0 to
// unique integers and Input1 to all ones.
int input_data[32];
for (size_t i = 0; i < 16; ++i) {
input_data[i] = i;
input_data[16 + i] = 1;
}
// copy INPUT0 and INPUT1 data in GPU shared memory
int* input_d_ptr;
cudaMalloc((void**)&input_d_ptr, input_byte_size * 2);
cudaMemcpy(
(void*)input_d_ptr, (void*)input_data, input_byte_size * 2,
cudaMemcpyHostToDevice);
cudaIpcMemHandle_t input_cuda_handle;
CreateCUDAIPCHandle(&input_cuda_handle, (void*)input_d_ptr);
FAIL_IF_ERR(
client->RegisterCudaSharedMemory(
"input_data", input_cuda_handle, 0 /* device_id */,
input_byte_size * 2),
"failed to register input shared memory region");
FAIL_IF_ERR(
input0_ptr->SetSharedMemory(
"input_data", input_byte_size, 0 /* offset */),
"unable to set shared memory for INPUT0");
FAIL_IF_ERR(
input1_ptr->SetSharedMemory(
"input_data", input_byte_size, input_byte_size /* offset */),
"unable to set shared memory for INPUT1");
// Generate the outputs to be requested.
tc::InferRequestedOutput* output0;
tc::InferRequestedOutput* output1;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
"unable to get 'OUTPUT0'");
std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
output0_ptr.reset(output0);
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
"unable to get 'OUTPUT1'");
std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
output1_ptr.reset(output1);
// Create Output0 and Output1 in CUDA Shared Memory
int *output0_d_ptr, *output1_d_ptr;
cudaMalloc((void**)&output0_d_ptr, output_byte_size * 2);
output1_d_ptr = (int*)output0_d_ptr + 16;
cudaIpcMemHandle_t output_cuda_handle;
CreateCUDAIPCHandle(&output_cuda_handle, (void*)output0_d_ptr);
FAIL_IF_ERR(
client->RegisterCudaSharedMemory(
"output_data", output_cuda_handle, 0 /* device_id */,
output_byte_size * 2),
"failed to register output shared memory region");
FAIL_IF_ERR(
output0_ptr->SetSharedMemory(
"output_data", output_byte_size, 0 /* offset */),
"unable to set shared memory for 'OUTPUT0'");
FAIL_IF_ERR(
output1_ptr->SetSharedMemory(
"output_data", output_byte_size, output_byte_size /* offset */),
"unable to set shared memory for 'OUTPUT1'");
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {
output0_ptr.get(), output1_ptr.get()};
tc::InferResult* results;
FAIL_IF_ERR(
client->Infer(&results, options, inputs, outputs, http_headers),
"unable to run model");
std::shared_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", results_ptr);
ValidateShapeAndDatatype("OUTPUT1", results_ptr);
// Copy input and output data back to the CPU
int output0_data[16], output1_data[16];
cudaMemcpy(
output0_data, output0_d_ptr, output_byte_size, cudaMemcpyDeviceToHost);
cudaMemcpy(
output1_data, output1_d_ptr, output_byte_size, cudaMemcpyDeviceToHost);
for (size_t i = 0; i < 16; ++i) {
std::cout << input_data[i] << " + " << input_data[16 + i] << " = "
<< output0_data[i] << std::endl;
std::cout << input_data[i] << " + " << input_data[16 + i] << " = "
<< output1_data[i] << std::endl;
if ((input_data[i] + input_data[16 + i]) != output0_data[i]) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if ((input_data[i] - input_data[16 + i]) != output1_data[i]) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
// Get shared memory regions active/registered within triton
inference::CudaSharedMemoryStatusResponse status;
FAIL_IF_ERR(
client->CudaSharedMemoryStatus(&status),
"failed to get shared memory status");
std::cout << "Shared Memory Status:\n" << status.DebugString() << "\n";
// Unregister shared memory
FAIL_IF_ERR(
client->UnregisterCudaSharedMemory("input_data"),
"unable to unregister shared memory input region");
FAIL_IF_ERR(
client->UnregisterCudaSharedMemory("output_data"),
"unable to unregister shared memory output region");
// Free GPU memory
FAIL_IF_CUDA_ERR(cudaFree(input_d_ptr));
FAIL_IF_CUDA_ERR(cudaFree(output0_d_ptr));
std::cout << "PASS : Cuda Shared Memory " << std::endl;
return 0;
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment