Unverified Commit 70e6ab26 authored by lvhan028's avatar lvhan028 Committed by GitHub
Browse files

Change target tritonfastertransformerbackend to trtonturbomindbackend (#36)

* change target tritonfastertransformerbackend to tritonturbomindbackend

* install targets to backends/turbomind

* changge model_dir
parent 35d64462
...@@ -374,8 +374,8 @@ install( ...@@ -374,8 +374,8 @@ install(
transformer-shared transformer-shared
EXPORT EXPORT
transformer-shared-targets transformer-shared-targets
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/fastertransformer LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/turbomind
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/fastertransformer ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/turbomind
) )
install( install(
......
...@@ -100,7 +100,7 @@ Run one of the following commands to serve a LLaMA model on NVIDIA GPU server: ...@@ -100,7 +100,7 @@ Run one of the following commands to serve a LLaMA model on NVIDIA GPU server:
```shell ```shell
python3 lmdeploy/serve/turbomind/deploy.py llama-7B /path/to/llama-7b llama \ python3 lmdeploy/serve/turbomind/deploy.py llama-7B /path/to/llama-7b llama \
--tokenizer_path /path/to/tokenizer/model --tokenizer_path /path/to/tokenizer/model
bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/fastertransformer bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/turbomind
``` ```
</details> </details>
...@@ -111,7 +111,7 @@ bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/fast ...@@ -111,7 +111,7 @@ bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/fast
```shell ```shell
python3 lmdeploy/serve/turbomind/deploy.py llama-13B /path/to/llama-13b llama \ python3 lmdeploy/serve/turbomind/deploy.py llama-13B /path/to/llama-13b llama \
--tokenizer_path /path/to/tokenizer/model --tp 2 --tokenizer_path /path/to/tokenizer/model --tp 2
bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/fastertransformer bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/turbomind
``` ```
</details> </details>
...@@ -129,7 +129,7 @@ python3 -m fastchat.model.apply_delta \ ...@@ -129,7 +129,7 @@ python3 -m fastchat.model.apply_delta \
--delta-path lmsys/vicuna-7b-delta-v1.1 --delta-path lmsys/vicuna-7b-delta-v1.1
python3 lmdeploy/serve/turbomind/deploy.py vicuna-7B /path/to/vicuna-7b hf python3 lmdeploy/serve/turbomind/deploy.py vicuna-7B /path/to/vicuna-7b hf
bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/fastertransformer bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/turbomind
``` ```
</details> </details>
...@@ -145,7 +145,7 @@ python3 -m fastchat.model.apply_delta \ ...@@ -145,7 +145,7 @@ python3 -m fastchat.model.apply_delta \
--delta-path lmsys/vicuna-13b-delta-v1.1 --delta-path lmsys/vicuna-13b-delta-v1.1
python3 lmdeploy/serve/turbomind/deploy.py vicuna-13B /path/to/vicuna-13b hf python3 lmdeploy/serve/turbomind/deploy.py vicuna-13B /path/to/vicuna-13b hf
bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/fastertransformer bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/turbomind
``` ```
</details> </details>
......
...@@ -98,7 +98,7 @@ make -j$(nproc) && make install ...@@ -98,7 +98,7 @@ make -j$(nproc) && make install
```shell ```shell
python3 lmdeploy/serve/turbomind/deploy.py llama-7B /path/to/llama-7b llama \ python3 lmdeploy/serve/turbomind/deploy.py llama-7B /path/to/llama-7b llama \
--tokenizer_path /path/to/tokenizer/model --tokenizer_path /path/to/tokenizer/model
bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/fastertransformer bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/turbomind
``` ```
</details> </details>
...@@ -109,7 +109,7 @@ bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/fast ...@@ -109,7 +109,7 @@ bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/fast
```shell ```shell
python3 lmdeploy/serve/turbomind/deploy.py llama-13B /path/to/llama-13b llama \ python3 lmdeploy/serve/turbomind/deploy.py llama-13B /path/to/llama-13b llama \
--tokenizer_path /path/to/tokenizer/model --tp 2 --tokenizer_path /path/to/tokenizer/model --tp 2
bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/fastertransformer bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/turbomind
``` ```
</details> </details>
...@@ -127,7 +127,7 @@ python3 -m fastchat.model.apply_delta \ ...@@ -127,7 +127,7 @@ python3 -m fastchat.model.apply_delta \
--delta-path lmsys/vicuna-7b-delta-v1.1 --delta-path lmsys/vicuna-7b-delta-v1.1
python3 lmdeploy/serve/turbomind/deploy.py vicuna-7B /path/to/vicuna-7b hf python3 lmdeploy/serve/turbomind/deploy.py vicuna-7B /path/to/vicuna-7b hf
bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/fastertransformer bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/turbomind
``` ```
</details> </details>
...@@ -143,7 +143,7 @@ python3 -m fastchat.model.apply_delta \ ...@@ -143,7 +143,7 @@ python3 -m fastchat.model.apply_delta \
--delta-path lmsys/vicuna-13b-delta-v1.1 --delta-path lmsys/vicuna-13b-delta-v1.1
python3 lmdeploy/serve/turbomind/deploy.py vicuna-13B /path/to/vicuna-13b hf python3 lmdeploy/serve/turbomind/deploy.py vicuna-13B /path/to/vicuna-13b hf
bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/fastertransformer bash workspace/service_docker_up.sh --lib-dir $(pwd)/build/install/backends/turbomind
``` ```
</details> </details>
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
data_type=fp16 data_type=fp16
enable_custom_all_reduce=0 enable_custom_all_reduce=0
pipeline_para_size=1 pipeline_para_size=1
tensor_para_size=8 tensor_para_size=1
model_dir=/shared_data/chatpjlm-0/v0.2.3/fastertransformer/weights/ model_dir=/workspace/models/triton_models/weights/
[request] [request]
......
...@@ -227,7 +227,7 @@ def deploy_llama(model_name: str, model_path: str, tokenizer_path: str, ...@@ -227,7 +227,7 @@ def deploy_llama(model_name: str, model_path: str, tokenizer_path: str,
del ckpt del ckpt
for name, param in model_params.items(): for name, param in model_params.items():
# transpose all weights as FasterTransformer is expecting column-major # transpose all weights as TurboMind is expecting column-major
# weights: (output_dims, input_dims) -> (input_dims, output_dims) # weights: (output_dims, input_dims) -> (input_dims, output_dims)
key = name.split('.')[-2] key = name.split('.')[-2]
if key in ['w1', 'w3', 'wq', 'wk', 'wv', 'w2', 'wo']: if key in ['w1', 'w3', 'wq', 'wk', 'wv', 'w2', 'wo']:
......
...@@ -5,7 +5,7 @@ show_help() { ...@@ -5,7 +5,7 @@ show_help() {
echo echo
echo "Options:" echo "Options:"
echo " -h, --help Show this help message and exit" echo " -h, --help Show this help message and exit"
echo " --lib-dir Specify the directory of fastertransformer libraries" echo " --lib-dir Specify the directory of turbomind libraries"
} }
# check if '-h' or '--help' in the arguments # check if '-h' or '--help' in the arguments
...@@ -64,7 +64,7 @@ for ((i = 1; i <= $#; i++)); do ...@@ -64,7 +64,7 @@ for ((i = 1; i <= $#; i++)); do
docker run \ docker run \
--gpus $DEVICES \ --gpus $DEVICES \
--rm \ --rm \
-v "${LIB_PATH}":/opt/tritonserver/backends/fastertransformer \ -v "${LIB_PATH}":/opt/tritonserver/backends/turbomind \
-v ""${SCRIPT_ABS_DIR}"":/workspace/models \ -v ""${SCRIPT_ABS_DIR}"":/workspace/models \
--shm-size 16g \ --shm-size 16g \
-p 33336:22 \ -p 33336:22 \
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "turbomind" name: "turbomind"
backend: "fastertransformer" backend: "turbomind"
default_model_filename: "weights" default_model_filename: "weights"
max_batch_size: 1 max_batch_size: 1
......
...@@ -148,7 +148,7 @@ size_t curandStateGetSize() ...@@ -148,7 +148,7 @@ size_t curandStateGetSize()
bool isDebug() bool isDebug()
{ {
static const bool is_debug = [] { static const bool is_debug = [] {
const auto level = std::getenv("FT_DEBUG_LEVEL"); const auto level = std::getenv("TM_DEBUG_LEVEL");
if (level && level == std::string("DEBUG")) { if (level && level == std::string("DEBUG")) {
return true; return true;
} }
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
cmake_minimum_required (VERSION 3.18) cmake_minimum_required (VERSION 3.18)
project(tritonfastertransformerbackend LANGUAGES C CXX) project(tritonturbomindbackend LANGUAGES C CXX)
# #
# Options # Options
...@@ -89,12 +89,12 @@ endif() # TRITON_ENABLE_GPU ...@@ -89,12 +89,12 @@ endif() # TRITON_ENABLE_GPU
configure_file(libtriton_fastertransformer.ldscript libtriton_fastertransformer.ldscript COPYONLY) configure_file(libtriton_fastertransformer.ldscript libtriton_fastertransformer.ldscript COPYONLY)
add_library( add_library(
triton-fastertransformer-backend SHARED triton-turbomind-backend SHARED
libfastertransformer.cc libfastertransformer.cc
) )
add_library( add_library(
TritonFasterTransformerBackend::triton-fastertransformer-backend ALIAS triton-fastertransformer-backend TritonTurboMindBackend::triton-turbomind-backend ALIAS triton-turbomind-backend
) )
find_package(CUDAToolkit REQUIRED) find_package(CUDAToolkit REQUIRED)
...@@ -106,13 +106,13 @@ endif() ...@@ -106,13 +106,13 @@ endif()
set(CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR}) set(CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR})
target_compile_definitions(triton-fastertransformer-backend target_compile_definitions(triton-turbomind-backend
PUBLIC PUBLIC
USE_TRITONSERVER_DATATYPE USE_TRITONSERVER_DATATYPE
BUILD_MULTI_GPU) BUILD_MULTI_GPU)
target_include_directories( target_include_directories(
triton-fastertransformer-backend triton-turbomind-backend
PRIVATE PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR}/src
${TRITON_PYTORCH_INCLUDE_PATHS} ${TRITON_PYTORCH_INCLUDE_PATHS}
...@@ -123,31 +123,31 @@ target_include_directories( ...@@ -123,31 +123,31 @@ target_include_directories(
) )
target_link_directories( target_link_directories(
triton-fastertransformer-backend triton-turbomind-backend
PRIVATE PRIVATE
${CUDA_PATH}/lib64 ${CUDA_PATH}/lib64
) )
target_compile_features(triton-fastertransformer-backend PRIVATE cxx_std_14) target_compile_features(triton-turbomind-backend PRIVATE cxx_std_14)
target_compile_options( target_compile_options(
triton-fastertransformer-backend PRIVATE triton-turbomind-backend PRIVATE
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>: $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-Wall -Wextra -Wno-unused-parameter -Wno-type-limits >#-Werror> -Wall -Wextra -Wno-unused-parameter -Wno-type-limits >#-Werror>
) )
if(${TRITON_ENABLE_GPU}) if(${TRITON_ENABLE_GPU})
target_compile_definitions( target_compile_definitions(
triton-fastertransformer-backend triton-turbomind-backend
PRIVATE TRITON_ENABLE_GPU=1 PRIVATE TRITON_ENABLE_GPU=1
) )
endif() # TRITON_ENABLE_GPU endif() # TRITON_ENABLE_GPU
set_target_properties( set_target_properties(
triton-fastertransformer-backend triton-turbomind-backend
PROPERTIES PROPERTIES
POSITION_INDEPENDENT_CODE ON POSITION_INDEPENDENT_CODE ON
OUTPUT_NAME triton_fastertransformer OUTPUT_NAME triton_turbomind
SKIP_BUILD_RPATH TRUE SKIP_BUILD_RPATH TRUE
BUILD_WITH_INSTALL_RPATH TRUE BUILD_WITH_INSTALL_RPATH TRUE
INSTALL_RPATH_USE_LINK_PATH FALSE INSTALL_RPATH_USE_LINK_PATH FALSE
...@@ -159,7 +159,7 @@ set_target_properties( ...@@ -159,7 +159,7 @@ set_target_properties(
# Need to turn off unused-but-set-variable due to Torchvision # Need to turn off unused-but-set-variable due to Torchvision
# Need to turn off unknown-pragmas due to ATen OpenMP # Need to turn off unknown-pragmas due to ATen OpenMP
set_target_properties( set_target_properties(
triton-fastertransformer-backend triton-turbomind-backend
PROPERTIES COMPILE_FLAGS PROPERTIES COMPILE_FLAGS
"-Wno-unknown-pragmas -Wno-unused-but-set-variable" "-Wno-unknown-pragmas -Wno-unused-but-set-variable"
) )
...@@ -170,7 +170,7 @@ FOREACH(p ${TRITON_PYTORCH_LIB_PATHS}) ...@@ -170,7 +170,7 @@ FOREACH(p ${TRITON_PYTORCH_LIB_PATHS})
ENDFOREACH(p) ENDFOREACH(p)
target_link_libraries( target_link_libraries(
triton-fastertransformer-backend triton-turbomind-backend
PRIVATE PRIVATE
triton-core-serverapi # from repo-core triton-core-serverapi # from repo-core
triton-core-backendapi # from repo-core triton-core-backendapi # from repo-core
...@@ -186,23 +186,23 @@ target_link_libraries( ...@@ -186,23 +186,23 @@ target_link_libraries(
if (BUILD_MULTI_GPU) if (BUILD_MULTI_GPU)
target_compile_definitions( target_compile_definitions(
triton-fastertransformer-backend triton-turbomind-backend
PUBLIC PUBLIC
BUILD_MULTI_GPU BUILD_MULTI_GPU
) )
target_include_directories( target_include_directories(
triton-fastertransformer-backend triton-turbomind-backend
PRIVATE PRIVATE
${MPI_INCLUDE_PATH} ${MPI_INCLUDE_PATH}
) )
target_link_directories( target_link_directories(
triton-fastertransformer-backend triton-turbomind-backend
PRIVATE PRIVATE
${MPI_Libraries} ${MPI_Libraries}
/usr/local/mpi/lib /usr/local/mpi/lib
) )
target_link_libraries( target_link_libraries(
triton-fastertransformer-backend triton-turbomind-backend
PRIVATE PRIVATE
${NCCL_LIBRARIES} ${NCCL_LIBRARIES}
${MPI_LIBRARIES} ${MPI_LIBRARIES}
...@@ -211,7 +211,7 @@ endif() ...@@ -211,7 +211,7 @@ endif()
if(${TRITON_ENABLE_GPU}) if(${TRITON_ENABLE_GPU})
target_link_libraries( target_link_libraries(
triton-fastertransformer-backend triton-turbomind-backend
PRIVATE PRIVATE
CUDA::cudart CUDA::cudart
) )
...@@ -221,38 +221,38 @@ endif() # TRITON_ENABLE_GPU ...@@ -221,38 +221,38 @@ endif() # TRITON_ENABLE_GPU
# Install # Install
# #
include(GNUInstallDirs) include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonFasterTransformerBackend) set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TurboMindBackend)
install( install(
TARGETS TARGETS
triton-fastertransformer-backend triton-turbomind-backend
EXPORT EXPORT
triton-fastertransformer-backend-targets triton-turbomind-backend-targets
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/fastertransformer LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/turbomind
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/fastertransformer ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/turbomind
) )
install( install(
EXPORT EXPORT
triton-fastertransformer-backend-targets triton-turbomind-backend-targets
FILE FILE
TritonFasterTransformerBackendTargets.cmake TritonTurboMindBackendTargets.cmake
NAMESPACE NAMESPACE
TritonFasterTransformerBackend:: TritonTurboMindBackend::
DESTINATION DESTINATION
${INSTALL_CONFIGDIR} ${INSTALL_CONFIGDIR}
) )
include(CMakePackageConfigHelpers) include(CMakePackageConfigHelpers)
configure_package_config_file( configure_package_config_file(
${CMAKE_SOURCE_DIR}/cmake/TritonFasterTransformerBackendConfig.cmake.in ${CMAKE_SOURCE_DIR}/cmake/TritonTurboMindBackendConfig.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/TritonFasterTransformerBackendConfig.cmake ${CMAKE_CURRENT_BINARY_DIR}/TritonTurboMindBackendConfig.cmake
INSTALL_DESTINATION ${INSTALL_CONFIGDIR} INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
) )
install( install(
FILES FILES
${CMAKE_CURRENT_BINARY_DIR}/TritonFasterTransformerBackendConfig.cmake ${CMAKE_CURRENT_BINARY_DIR}/TritonTurboMindBackendConfig.cmake
DESTINATION ${INSTALL_CONFIGDIR} DESTINATION ${INSTALL_CONFIGDIR}
) )
...@@ -260,12 +260,12 @@ install( ...@@ -260,12 +260,12 @@ install(
# Export from build tree # Export from build tree
# #
export( export(
EXPORT triton-fastertransformer-backend-targets EXPORT triton-turbomind-backend-targets
FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonFasterTransformerBackendTargets.cmake FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonTurboMindBackendTargets.cmake
NAMESPACE TritonFasterTransformerBackend:: NAMESPACE TritonTurboMindBackend::
) )
export(PACKAGE TritonFasterTransformerBackend) export(PACKAGE TritonTurboMindBackend)
# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. # Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
......
...@@ -511,11 +511,11 @@ TRITONSERVER_Error* ModelState::AutoCompleteConfig() ...@@ -511,11 +511,11 @@ TRITONSERVER_Error* ModelState::AutoCompleteConfig()
} }
} }
else { else {
// Auto-complete configuration is not supported since fastertransformer does // Auto-complete configuration is not supported since turbomind does
// not store/capture sufficient model metadata so just log error instead. // not store/capture sufficient model metadata so just log error instead.
LOG_MESSAGE(TRITONSERVER_LOG_WARN, LOG_MESSAGE(TRITONSERVER_LOG_WARN,
(std::string("skipping model configuration auto-complete for '") + Name() (std::string("skipping model configuration auto-complete for '") + Name()
+ "': not supported for fastertransformer backend") + "': not supported for turbomind backend")
.c_str()); .c_str());
} }
...@@ -940,7 +940,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests, const ...@@ -940,7 +940,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests, const
request_count, request_count,
TRITONSERVER_ErrorNew( TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INTERNAL, TRITONSERVER_ERROR_INTERNAL,
std::string("null request given to FasterTransformer backend for '" + Name() + "'").c_str())); std::string("null request given to TurboMind backend for '" + Name() + "'").c_str()));
return; return;
} }
...@@ -1115,7 +1115,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests, const ...@@ -1115,7 +1115,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests, const
for (auto& response : responses) { for (auto& response : responses) {
if (response != nullptr) { if (response != nullptr) {
LOG_IF_ERROR(TRITONBACKEND_ResponseSend(response, TRITONSERVER_RESPONSE_COMPLETE_FINAL, nullptr), LOG_IF_ERROR(TRITONBACKEND_ResponseSend(response, TRITONSERVER_RESPONSE_COMPLETE_FINAL, nullptr),
"failed to send FasterTransformer backend response"); "failed to send TurboMind backend response");
LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("response is sent")).c_str()); LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("response is sent")).c_str());
} }
else { else {
...@@ -1160,7 +1160,7 @@ void streaming_callback(std::shared_ptr<std::unordered_map<std::string, Tensor>> ...@@ -1160,7 +1160,7 @@ void streaming_callback(std::shared_ptr<std::unordered_map<std::string, Tensor>>
if (response != nullptr) { if (response != nullptr) {
LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("start to send streaming response")).c_str()); LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("start to send streaming response")).c_str());
LOG_IF_ERROR(TRITONBACKEND_ResponseSend(response, 0, nullptr), LOG_IF_ERROR(TRITONBACKEND_ResponseSend(response, 0, nullptr),
"failed to send FasterTransformer backend response"); "failed to send TurboMind backend response");
LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("streaming response is sent")).c_str()); LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("streaming response is sent")).c_str());
} }
else { else {
...@@ -1358,7 +1358,7 @@ ModelInstanceState::Execute(std::vector<TRITONBACKEND_Response*>* ...@@ -1358,7 +1358,7 @@ ModelInstanceState::Execute(std::vector<TRITONBACKEND_Response*>*
responses, responses,
response_count, response_count,
TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL,
("FasterTransformer execute failure: " + std::string(ex.what())).c_str())); ("TurboMind execute failure: " + std::string(ex.what())).c_str()));
} }
auto output_tensors = output_tensors_list[0]; auto output_tensors = output_tensors_list[0];
return output_tensors; return output_tensors;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment