Unverified Commit bb6f8060 authored by lvhan028's avatar lvhan028 Committed by GitHub
Browse files

install triton_example and TransformerTritonBackend to runtime and lib respectively (#39)

parent 6e58fced
......@@ -376,6 +376,7 @@ install(
transformer-shared-targets
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/turbomind
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/turbomind
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin
)
install(
......
......@@ -72,7 +72,7 @@ def warmup(tritonserver_addr: str,
def main(tritonserver_addr: str,
model_name: str,
concurrency: int = 1,
session_len: int = 2048,
session_len: int = 2056,
input_seqlen: int = 0,
output_seqlen: int = 512,
test_round: int = 10):
......@@ -116,7 +116,7 @@ def main(tritonserver_addr: str,
token_latency_max = np.max(stats[:, 2], axis=0)
token_latency_ave = np.mean(stats[:, 2], axis=0)
throughput = np.sum(stats[:, 1], axis=0) / np.sum(stats[:, 2], axis=0)
print(f'\n{"-" * 50}\ncocurrency: {concurrency}, input_tokens: '
print(f'\n{"-" * 50}\nconcurrency: {concurrency}, input_tokens: '
f'{input_seqlen}, output_tokens: {output_seqlen}\n'
f'elapsed_time: {elapsed_time:.2f}s\n'
f'first_token latency(min, max, ave): '
......
......@@ -4,3 +4,5 @@ add_executable(llama_triton_example llama_triton_example.cc)
target_link_libraries(llama_triton_example PUBLIC -lcublas -lcublasLt -lcudart
LlamaTritonBackend TransformerTritonBackend mpi_utils nccl_utils
nvtx_utils word_list glog)
install(TARGETS llama_triton_example DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
......@@ -328,6 +328,7 @@ class Chatbot:
f'#input tokens {input_tokens}, ' \
f'history tokens {session.sequence_length}, ' \
f'request length {request_output_len}'
logger.warning(errmsg)
yield StatusCode.TRITON_SESSION_OUT_OF_LIMIT, errmsg, 0
return
......
......@@ -143,7 +143,7 @@ def export(model_name: str,
# parameters for turbomind
max_batch_size=32,
max_context_token_num=4,
session_len=2048,
session_len=2056,
step_length=1,
cache_max_entry_count=48,
cache_chunk_size=8,
......
......@@ -284,5 +284,6 @@ export(PACKAGE TritonTurboMindBackend)
add_library(TransformerTritonBackend SHARED transformer_triton_backend.cpp)
target_link_libraries(TransformerTritonBackend PRIVATE nccl_utils mpi_utils)
install(TARGETS TransformerTritonBackend DESTINATION ${CMAKE_INSTALL_LIBDIR})
add_subdirectory(llama)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment