Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
bb6f8060
Unverified
Commit
bb6f8060
authored
Jul 03, 2023
by
lvhan028
Committed by
GitHub
Jul 03, 2023
Browse files
install triton_example and TransformerTritonBackend to runtime and lib respectively (#39)
parent
6e58fced
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
8 additions
and
3 deletions
+8
-3
CMakeLists.txt
CMakeLists.txt
+1
-0
benchmark/profile_generation.py
benchmark/profile_generation.py
+2
-2
examples/cpp/llama/CMakeLists.txt
examples/cpp/llama/CMakeLists.txt
+2
-0
lmdeploy/serve/turbomind/chatbot.py
lmdeploy/serve/turbomind/chatbot.py
+1
-0
lmdeploy/serve/turbomind/deploy.py
lmdeploy/serve/turbomind/deploy.py
+1
-1
src/turbomind/triton_backend/CMakeLists.txt
src/turbomind/triton_backend/CMakeLists.txt
+1
-0
No files found.
CMakeLists.txt
View file @
bb6f8060
...
@@ -376,6 +376,7 @@ install(
...
@@ -376,6 +376,7 @@ install(
transformer-shared-targets
transformer-shared-targets
LIBRARY DESTINATION
${
CMAKE_INSTALL_PREFIX
}
/backends/turbomind
LIBRARY DESTINATION
${
CMAKE_INSTALL_PREFIX
}
/backends/turbomind
ARCHIVE DESTINATION
${
CMAKE_INSTALL_PREFIX
}
/backends/turbomind
ARCHIVE DESTINATION
${
CMAKE_INSTALL_PREFIX
}
/backends/turbomind
RUNTIME DESTINATION
${
CMAKE_INSTALL_PREFIX
}
/bin
)
)
install
(
install
(
...
...
benchmark/profile_generation.py
View file @
bb6f8060
...
@@ -72,7 +72,7 @@ def warmup(tritonserver_addr: str,
...
@@ -72,7 +72,7 @@ def warmup(tritonserver_addr: str,
def
main
(
tritonserver_addr
:
str
,
def
main
(
tritonserver_addr
:
str
,
model_name
:
str
,
model_name
:
str
,
concurrency
:
int
=
1
,
concurrency
:
int
=
1
,
session_len
:
int
=
20
48
,
session_len
:
int
=
20
56
,
input_seqlen
:
int
=
0
,
input_seqlen
:
int
=
0
,
output_seqlen
:
int
=
512
,
output_seqlen
:
int
=
512
,
test_round
:
int
=
10
):
test_round
:
int
=
10
):
...
@@ -116,7 +116,7 @@ def main(tritonserver_addr: str,
...
@@ -116,7 +116,7 @@ def main(tritonserver_addr: str,
token_latency_max
=
np
.
max
(
stats
[:,
2
],
axis
=
0
)
token_latency_max
=
np
.
max
(
stats
[:,
2
],
axis
=
0
)
token_latency_ave
=
np
.
mean
(
stats
[:,
2
],
axis
=
0
)
token_latency_ave
=
np
.
mean
(
stats
[:,
2
],
axis
=
0
)
throughput
=
np
.
sum
(
stats
[:,
1
],
axis
=
0
)
/
np
.
sum
(
stats
[:,
2
],
axis
=
0
)
throughput
=
np
.
sum
(
stats
[:,
1
],
axis
=
0
)
/
np
.
sum
(
stats
[:,
2
],
axis
=
0
)
print
(
f
'
\n
{
"-"
*
50
}
\n
cocurrency:
{
concurrency
}
, input_tokens: '
print
(
f
'
\n
{
"-"
*
50
}
\n
co
n
currency:
{
concurrency
}
, input_tokens: '
f
'
{
input_seqlen
}
, output_tokens:
{
output_seqlen
}
\n
'
f
'
{
input_seqlen
}
, output_tokens:
{
output_seqlen
}
\n
'
f
'elapsed_time:
{
elapsed_time
:.
2
f
}
s
\n
'
f
'elapsed_time:
{
elapsed_time
:.
2
f
}
s
\n
'
f
'first_token latency(min, max, ave): '
f
'first_token latency(min, max, ave): '
...
...
examples/cpp/llama/CMakeLists.txt
View file @
bb6f8060
...
@@ -4,3 +4,5 @@ add_executable(llama_triton_example llama_triton_example.cc)
...
@@ -4,3 +4,5 @@ add_executable(llama_triton_example llama_triton_example.cc)
target_link_libraries
(
llama_triton_example PUBLIC -lcublas -lcublasLt -lcudart
target_link_libraries
(
llama_triton_example PUBLIC -lcublas -lcublasLt -lcudart
LlamaTritonBackend TransformerTritonBackend mpi_utils nccl_utils
LlamaTritonBackend TransformerTritonBackend mpi_utils nccl_utils
nvtx_utils word_list glog
)
nvtx_utils word_list glog
)
install
(
TARGETS llama_triton_example DESTINATION
${
CMAKE_INSTALL_PREFIX
}
/bin
)
lmdeploy/serve/turbomind/chatbot.py
View file @
bb6f8060
...
@@ -328,6 +328,7 @@ class Chatbot:
...
@@ -328,6 +328,7 @@ class Chatbot:
f
'#input tokens
{
input_tokens
}
, '
\
f
'#input tokens
{
input_tokens
}
, '
\
f
'history tokens
{
session
.
sequence_length
}
, '
\
f
'history tokens
{
session
.
sequence_length
}
, '
\
f
'request length
{
request_output_len
}
'
f
'request length
{
request_output_len
}
'
logger
.
warning
(
errmsg
)
yield
StatusCode
.
TRITON_SESSION_OUT_OF_LIMIT
,
errmsg
,
0
yield
StatusCode
.
TRITON_SESSION_OUT_OF_LIMIT
,
errmsg
,
0
return
return
...
...
lmdeploy/serve/turbomind/deploy.py
View file @
bb6f8060
...
@@ -143,7 +143,7 @@ def export(model_name: str,
...
@@ -143,7 +143,7 @@ def export(model_name: str,
# parameters for turbomind
# parameters for turbomind
max_batch_size
=
32
,
max_batch_size
=
32
,
max_context_token_num
=
4
,
max_context_token_num
=
4
,
session_len
=
20
48
,
session_len
=
20
56
,
step_length
=
1
,
step_length
=
1
,
cache_max_entry_count
=
48
,
cache_max_entry_count
=
48
,
cache_chunk_size
=
8
,
cache_chunk_size
=
8
,
...
...
src/turbomind/triton_backend/CMakeLists.txt
View file @
bb6f8060
...
@@ -284,5 +284,6 @@ export(PACKAGE TritonTurboMindBackend)
...
@@ -284,5 +284,6 @@ export(PACKAGE TritonTurboMindBackend)
add_library
(
TransformerTritonBackend SHARED transformer_triton_backend.cpp
)
add_library
(
TransformerTritonBackend SHARED transformer_triton_backend.cpp
)
target_link_libraries
(
TransformerTritonBackend PRIVATE nccl_utils mpi_utils
)
target_link_libraries
(
TransformerTritonBackend PRIVATE nccl_utils mpi_utils
)
install
(
TARGETS TransformerTritonBackend DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
)
add_subdirectory
(
llama
)
add_subdirectory
(
llama
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment