Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
d7117b95
Commit
d7117b95
authored
Mar 22, 2024
by
zhouxiang
Browse files
同步0.2.6代码
parent
5f83e392
Changes
151
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
124 additions
and
309 deletions
+124
-309
src/turbomind/python/CMakeLists.txt
src/turbomind/python/CMakeLists.txt
+9
-2
src/turbomind/triton_backend/llama/LlamaTritonModel.cc
src/turbomind/triton_backend/llama/LlamaTritonModel.cc
+11
-13
src/turbomind/utils/CMakeLists.txt
src/turbomind/utils/CMakeLists.txt
+2
-1
tests/csrc/unittests/test_penalty_kernels.cu
tests/csrc/unittests/test_penalty_kernels.cu
+5
-0
tests/pytorch/test_decode.py
tests/pytorch/test_decode.py
+0
-68
tests/pytorch/test_dist.py
tests/pytorch/test_dist.py
+0
-51
tests/pytorch/test_model.py
tests/pytorch/test_model.py
+0
-19
tests/pytorch/test_utils.py
tests/pytorch/test_utils.py
+0
-44
tests/test_lmdeploy/test_cli.py
tests/test_lmdeploy/test_cli.py
+0
-51
tests/test_lmdeploy/test_model.py
tests/test_lmdeploy/test_model.py
+51
-48
tests/test_lmdeploy/test_tokenizer.py
tests/test_lmdeploy/test_tokenizer.py
+46
-12
No files found.
src/turbomind/python/CMakeLists.txt
View file @
d7117b95
...
...
@@ -17,6 +17,13 @@ target_link_libraries(${PROJECT_NAME} PRIVATE TransformerTritonBackend
LlamaTritonBackend
)
target_compile_features
(
${
PROJECT_NAME
}
PRIVATE cxx_std_14
)
set
(
_INSTALL_CUDA_RPATH
"
\$
ORIGIN"
"
\$
ORIGIN/../../nvidia/nccl/lib/"
"
\$
ORIGIN/../../nvidia/cuda_runtime/lib/"
"
\$
ORIGIN/../../nvidia/cublas/lib/"
)
set_target_properties
(
${
PROJECT_NAME
}
PROPERTIES
BUILD_RPATH
"
\$
ORIGIN"
INSTALL_RPATH
"
\$
ORIGIN;
\$
ORIGIN/../../nvidia/nccl/lib/"
)
BUILD_RPATH
"
\$
ORIGIN"
INSTALL_RPATH
"
${
_INSTALL_CUDA_RPATH
}
"
)
src/turbomind/triton_backend/llama/LlamaTritonModel.cc
View file @
d7117b95
...
...
@@ -47,7 +47,6 @@ std::shared_ptr<AbstractTransformerModel> AbstractTransformerModel::createLlamaM
reader
.
GetInteger
(
"ft_instance_hyperparameter"
,
"enable_custom_all_reduce"
,
0
),
model_dir
);
}
#ifdef ENABLE_BF16
else
if
(
data_type
==
"bf16"
)
{
#ifdef ENABLE_BF16
return
std
::
make_shared
<
LlamaTritonModel
<
__nv_bfloat16
>>
(
...
...
@@ -60,7 +59,6 @@ std::shared_ptr<AbstractTransformerModel> AbstractTransformerModel::createLlamaM
ft
::
FT_CHECK
(
false
);
#endif
}
#endif
else
{
return
std
::
make_shared
<
LlamaTritonModel
<
float
>>
(
reader
.
GetInteger
(
"ft_instance_hyperparameter"
,
"tensor_para_size"
),
...
...
@@ -146,7 +144,17 @@ LlamaTritonModel<T>::LlamaTritonModel(size_t tensor_para_size,
enable_custom_all_reduce_
(
enable_custom_all_reduce
)
{
INIReader
reader
;
FT_CHECK_WITH_INFO
((
config
.
empty
()
^
model_dir
.
empty
()),
"invalid init options"
);
FT_CHECK_WITH_INFO
(
!
(
config
.
empty
()
&&
model_dir
.
empty
()),
"invalid init options"
);
if
(
!
model_dir
.
empty
())
{
model_dir_
=
model_dir
;
const
std
::
string
inifile
{
model_dir
+
"/config.ini"
};
reader
=
INIReader
(
inifile
);
if
(
reader
.
ParseError
()
<
0
)
{
TM_LOG_ERROR
(
"[ERROR] Can't load %s"
,
inifile
.
c_str
());
ft
::
FT_CHECK
(
false
);
}
}
if
(
!
config
.
empty
())
{
std
::
FILE
*
tmpf
=
std
::
tmpfile
();
...
...
@@ -159,16 +167,6 @@ LlamaTritonModel<T>::LlamaTritonModel(size_t tensor_para_size,
}
}
if
(
!
model_dir
.
empty
())
{
model_dir_
=
model_dir
;
const
std
::
string
inifile
{
model_dir
+
"/config.ini"
};
reader
=
INIReader
(
inifile
);
if
(
reader
.
ParseError
()
<
0
)
{
TM_LOG_ERROR
(
"[ERROR] Can't load %s"
,
inifile
.
c_str
());
ft
::
FT_CHECK
(
false
);
}
}
model_name_
=
reader
.
Get
(
"llama"
,
"model_name"
);
head_num_
=
reader
.
GetInteger
(
"llama"
,
"head_num"
);
kv_head_num_
=
reader
.
GetInteger
(
"llama"
,
"kv_head_num"
,
0
);
...
...
src/turbomind/utils/CMakeLists.txt
View file @
d7117b95
...
...
@@ -67,7 +67,8 @@ add_library(mpi_utils STATIC mpi_utils.cc)
#set_property(TARGET mpi_utils PROPERTY POSITION_INDEPENDENT_CODE ON)
#set_property(TARGET mpi_utils PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
if
(
BUILD_MULTI_GPU
)
target_link_libraries
(
mpi_utils PUBLIC mpi logger
)
#target_link_libraries(mpi_utils PUBLIC mpi logger)
target_link_libraries
(
mpi_utils PUBLIC
${
MPI_CXX_LIBRARIES
}
logger
)
endif
()
add_library
(
nccl_utils STATIC nccl_utils.cc
)
...
...
tests/csrc/unittests/test_penalty_kernels.cu
View file @
d7117b95
...
...
@@ -386,6 +386,7 @@ protected:
T
*
d_bias_
;
int
*
d_output_ids_
;
int
*
d_input_lengths_
;
int
*
d_penalty_workspace_
;
float
*
d_repetition_penalties_
;
...
...
@@ -410,6 +411,8 @@ protected:
d_bias_
=
reinterpret_cast
<
T
*>
(
allocator
->
malloc
(
sizeof
(
T
)
*
vocab_size_padded_
));
d_output_ids_
=
reinterpret_cast
<
int
*>
(
allocator
->
malloc
(
sizeof
(
int
)
*
sequence_length_
*
batch_size_
));
d_input_lengths_
=
reinterpret_cast
<
int
*>
(
allocator
->
malloc
(
sizeof
(
int
)
*
batch_size_
));
d_penalty_workspace_
=
reinterpret_cast
<
int
*>
(
allocator
->
malloc
((
sizeof
(
int
)
+
sizeof
(
float
))
*
batch_size_
*
step_
));
cudaAutoCpy
(
d_logits_
,
h_logits_
,
batch_size_
*
vocab_size_padded_
,
stream
);
cudaAutoCpy
(
d_bias_
,
h_bias_
,
vocab_size_padded_
,
stream
);
...
...
@@ -501,6 +504,7 @@ public:
else
{
invokeBatchApplyRepetitionPenalty
(
d_logits_
,
d_repetition_penalties_
,
d_penalty_workspace_
,
d_output_ids_
,
batch_size_
,
batch_size_
,
...
...
@@ -559,6 +563,7 @@ public:
cudaAutoCpy
(
d_logits_batch
,
h_logits_
,
batch_size_
*
vocab_size_padded_
,
stream
);
invokeBatchApplyRepetitionPenalty
(
d_logits_batch
,
d_repetition_penalties_
,
d_penalty_workspace_
,
d_output_ids_
,
batch_size_
,
batch_size_
,
...
...
tests/pytorch/test_decode.py
deleted
100644 → 0
View file @
5f83e392
import
os
import
numpy
as
np
import
torch
from
transformers
import
AutoTokenizer
from
lmdeploy.pytorch.decode
import
Engine
,
decode_single
from
lmdeploy.pytorch.model
import
accel_model
,
init_model
def
_test_decode_dist
(
model_path
,
prompt
):
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_path
)
tokenizer
.
pad_token_id
=
tokenizer
.
eos_token_id
tokenizer
.
padding_side
=
'right'
inputs
=
tokenizer
(
prompt
)
input_ids
=
inputs
.
input_ids
engine
=
Engine
(
model_path
,
tokenizer
=
tokenizer
)
probs
=
engine
.
decode
(
input_ids
,
sort
=
False
,
max_bs
=
1
,
pad
=
True
)
return
probs
def
_test_decode_single
(
model_path
,
prompt
):
model
,
tokenizer
=
init_model
(
model_path
)
model
=
accel_model
(
model
)
model
=
model
.
eval
()
tokenizer
.
pad_token_id
=
tokenizer
.
eos_token_id
tokenizer
.
padding_side
=
'right'
inputs
=
tokenizer
(
prompt
,
return_tensors
=
'pt'
,
padding
=
True
)
input_ids
=
inputs
.
input_ids
.
cuda
()
attention_mask
=
inputs
.
attention_mask
.
cuda
()
probs
:
torch
.
Tensor
=
decode_single
(
model
,
input_ids
,
attention_mask
)
return
probs
.
numpy
()
def
test_compare
(
output_outliers
=
True
):
os
.
environ
[
'CUDA_VISIBLE_DEVICES'
]
=
'0,1,2,3'
os
.
environ
[
'MKL_THREADING_LAYER'
]
=
'GNU'
# https://github.com/pytorch/pytorch/issues/37377#issuecomment-629529611
model_path
=
'llama2/huggingface/llama-2-7b'
prompts
=
[
'I believe the meaning of life is to find your gift. The purpose of life is to give it away.'
,
# noqa: E501
'Simply put, the theory of relativity states that '
,
'Building a website can be done in 10 simple steps:'
]
p_single
=
_test_decode_single
(
model_path
,
prompts
)
p_dist
=
_test_decode_dist
(
model_path
,
prompts
)
rtol
=
2.0e-2
atol
=
2.0e-2
if
output_outliers
:
np
.
set_printoptions
(
linewidth
=
150
,
edgeitems
=
5
)
failed
=
(
abs
(
p_dist
-
p_single
)
>
atol
+
rtol
*
abs
(
p_single
))
idx
=
failed
.
nonzero
()
print
(
f
'Num outliers:
{
len
(
idx
[
0
])
}
'
)
print
(
p_dist
[
idx
])
print
(
p_single
[
idx
])
assert
np
.
allclose
(
p_dist
,
p_single
,
rtol
=
rtol
,
atol
=
atol
)
tests/pytorch/test_dist.py
deleted
100644 → 0
View file @
5f83e392
import
unittest
import
torch
from
lmdeploy.pytorch.dist
import
(
get_rank
,
master_only
,
master_only_and_broadcast_general
,
master_only_and_broadcast_tensor
)
class
SimpleTest
(
unittest
.
TestCase
):
@
master_only
def
fake_input
(
self
):
print
(
f
'Evaluate fake input 1 on
{
get_rank
()
}
'
)
return
'master only or none'
@
master_only_and_broadcast_general
def
fake_input21
(
self
):
print
(
f
'Evaluate fake input 21 (str) on
{
get_rank
()
}
'
)
return
'master only and_broadcast'
@
master_only_and_broadcast_general
def
fake_input22
(
self
):
print
(
f
'Evaluate fake input 22 (cpu tensor) on
{
get_rank
()
}
'
)
return
torch
.
tensor
([
6
,
66
,
666
])
@
master_only_and_broadcast_tensor
def
fake_input3
(
self
):
print
(
f
'Evaluate fake input 3 (gpu tensor) on
{
get_rank
()
}
'
)
return
torch
.
tensor
([
6
,
66
,
666
]).
cuda
()
def
test
(
self
):
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
)
rank
=
get_rank
()
# unittest will discard --local_rank, thus set manually
torch
.
cuda
.
set_device
(
rank
)
in1
=
self
.
fake_input
()
in21
=
self
.
fake_input21
()
in22
=
self
.
fake_input22
()
in3
=
self
.
fake_input3
(
dtype
=
torch
.
long
,
size
=
(
1
,
3
))
if
rank
==
0
:
self
.
assertEqual
(
in1
,
'master only or none'
)
else
:
self
.
assertEqual
(
in1
,
None
)
self
.
assertEqual
(
in21
,
'master only and_broadcast'
)
self
.
assertTrue
(
torch
.
allclose
(
in22
,
torch
.
tensor
([
6
,
66
,
666
])))
self
.
assertFalse
(
torch
.
allclose
(
in3
.
cpu
(),
torch
.
tensor
([
6
,
6
,
666
])))
self
.
assertTrue
(
torch
.
allclose
(
in3
.
cpu
(),
torch
.
tensor
([
6
,
66
,
666
])))
tests/pytorch/test_model.py
deleted
100644 → 0
View file @
5f83e392
from
lmdeploy.pytorch.model
import
accel_model
,
init_model
def
test_init_model
():
cprint
=
lambda
x
:
print
(
f
'
\033
[92m
{
x
}
\033
[0m'
)
# noqa: E731
# Test llama2-7b
for
model_path
in
[
'llama2/huggingface/llama-2-7b'
,
'internlm-7b'
]:
model
,
tokenizer
=
init_model
(
model_path
)
assert
tokenizer
.
is_fast
cprint
(
'llama2 on CPU'
)
print
(
model
)
model1
=
accel_model
(
model
)
cprint
(
'llama2 on GPU'
)
print
(
model1
)
cprint
(
'llama2 with kernel injection'
)
model2
=
accel_model
(
model
,
accel
=
'deepspeed'
)
assert
'DeepSpeedSelfAttention'
in
repr
(
model2
)
assert
'DeepSpeedMLP'
in
repr
(
model2
)
tests/pytorch/test_utils.py
deleted
100644 → 0
View file @
5f83e392
from
lmdeploy.pytorch.utils
import
BasicStreamer
,
TerminalIO
def
test_terminal_io
(
monkeypatch
):
import
io
tio
=
TerminalIO
()
inputs
=
'hello
\n\n
'
# inputs = 'hello\n\n\x1B[A\n\n'
monkeypatch
.
setattr
(
'sys.stdin'
,
io
.
StringIO
(
inputs
))
string
=
tio
.
input
()
tio
.
output
(
string
)
assert
string
==
'hello'
# string = tio.input()
# tio.output(string)
# assert string == 'hello'
def
test_basic_streamer
():
output
=
[]
def
decode_func
(
value
):
return
value
+
10
def
output_func
(
value
):
output
.
append
(
value
)
streamer
=
BasicStreamer
(
decode_func
,
output_func
)
for
i
in
range
(
10
):
streamer
.
put
(
i
)
if
i
==
5
:
streamer
.
end
()
streamer
.
end
()
assert
output
==
[
11
,
12
,
13
,
14
,
15
,
'
\n
'
,
17
,
18
,
19
,
'
\n
'
]
output
.
clear
()
streamer
=
BasicStreamer
(
decode_func
,
output_func
,
skip_prompt
=
False
)
for
i
in
range
(
10
):
streamer
.
put
(
i
)
if
i
==
5
:
streamer
.
end
()
streamer
.
end
()
assert
output
==
[
10
,
11
,
12
,
13
,
14
,
15
,
'
\n
'
,
16
,
17
,
18
,
19
,
'
\n
'
]
tests/test_lmdeploy/test_cli.py
deleted
100644 → 0
View file @
5f83e392
import
inspect
def
compare_func
(
class_method
,
function
):
"""Compare if a class method has same arguments as a function."""
argspec_cls
=
inspect
.
getfullargspec
(
class_method
)
argspec_func
=
inspect
.
getfullargspec
(
function
)
assert
argspec_cls
.
args
[
1
:]
==
argspec_func
.
args
assert
argspec_cls
.
defaults
==
argspec_func
.
defaults
assert
argspec_cls
.
annotations
==
argspec_func
.
annotations
def
test_cli
():
from
lmdeploy.cli.cli
import
CLI
from
lmdeploy.serve.turbomind.deploy
import
main
as
convert
compare_func
(
CLI
.
convert
,
convert
)
def
test_subcli_chat
():
from
lmdeploy.cli.chat
import
SubCliChat
from
lmdeploy.pytorch.chat
import
main
as
run_torch_model
from
lmdeploy.turbomind.chat
import
main
as
run_turbomind_model
compare_func
(
SubCliChat
.
torch
,
run_torch_model
)
compare_func
(
SubCliChat
.
turbomind
,
run_turbomind_model
)
def
test_subcli_lite
():
from
lmdeploy.cli.lite
import
SubCliLite
from
lmdeploy.lite.apis.auto_awq
import
auto_awq
from
lmdeploy.lite.apis.calibrate
import
calibrate
from
lmdeploy.lite.apis.kv_qparams
import
main
as
run_kv_qparams
compare_func
(
SubCliLite
.
auto_awq
,
auto_awq
)
compare_func
(
SubCliLite
.
calibrate
,
calibrate
)
compare_func
(
SubCliLite
.
kv_qparams
,
run_kv_qparams
)
def
test_subcli_serve
():
from
lmdeploy.cli.serve
import
SubCliServe
from
lmdeploy.serve.client
import
main
as
run_triton_client
from
lmdeploy.serve.gradio.app
import
run
as
run_gradio
from
lmdeploy.serve.openai.api_client
import
main
as
run_api_client
from
lmdeploy.serve.openai.api_server
import
serve
as
run_api_server
compare_func
(
SubCliServe
.
gradio
,
run_gradio
)
compare_func
(
SubCliServe
.
api_server
,
run_api_server
)
compare_func
(
SubCliServe
.
api_client
,
run_api_client
)
compare_func
(
SubCliServe
.
triton_client
,
run_triton_client
)
tests/test_lmdeploy/test_model.py
View file @
d7117b95
import
pytest
from
lmdeploy.model
import
MODELS
,
SamplingParam
from
lmdeploy.model
import
MODELS
,
best_match_model
@
pytest
.
mark
.
parametrize
(
'model_path_and_name'
,
[(
'internlm/internlm-chat-7b'
,
[
'internlm'
]),
(
'internlm/internlm2-1_8b'
,
[
'base'
]),
(
'models--internlm--internlm-chat-7b/snapshots/1234567'
,
[
'internlm'
]),
(
'Qwen/Qwen-7B-Chat'
,
[
'qwen'
]),
(
'codellama/CodeLlama-7b-hf'
,
[
'codellama'
]),
(
'upstage/SOLAR-0-70b'
,
[
'solar'
,
'solar-70b'
]),
(
'meta-llama/Llama-2-7b-chat-hf'
,
[
'llama2'
]),
(
'THUDM/chatglm2-6b'
,
[
'chatglm'
]),
(
'01-ai/Yi-6B-200k'
,
[
'yi'
,
'yi-200k'
]),
(
'01-ai/Yi-34B-Chat'
,
[
'yi'
]),
(
'01-ai/Yi-6B-Chat'
,
[
'yi'
,
'yi-chat'
]),
(
'WizardLM/WizardLM-70B-V1.0'
,
[
'wizardlm'
]),
(
'codellama/CodeLlama-34b-Instruct-hf'
,
[
'codellama'
]),
(
'tiiuae/falcon-7b'
,
[
'falcon'
]),
(
'workspace'
,
[
None
])])
@
pytest
.
mark
.
parametrize
(
'suffix'
,
[
''
,
'-w4'
,
'-4bit'
,
'-16bit'
])
def
test_best_match_model
(
model_path_and_name
,
suffix
):
if
model_path_and_name
[
0
]
==
'internlm/internlm2-1_8b'
and
suffix
:
return
# internlm/internlm2-1_8b-suffix will got None
deduced_name
=
best_match_model
(
model_path_and_name
[
0
]
+
suffix
)
if
deduced_name
is
not
None
:
assert
deduced_name
in
model_path_and_name
[
1
],
f
'expect
{
model_path_and_name
[
1
]
}
, but got
{
deduced_name
}
'
else
:
assert
deduced_name
in
model_path_and_name
[
1
],
f
'expect
{
model_path_and_name
[
1
]
}
, but got
{
deduced_name
}
'
@
pytest
.
mark
.
parametrize
(
'model_name'
,
[
'llama2'
,
'base'
,
'yi'
,
'qwen-7b'
,
'vicuna'
])
@
pytest
.
mark
.
parametrize
(
'meta_instruction'
,
[
'[fake meta_instruction]'
])
def
test_model_config
(
model_name
,
meta_instruction
):
from
lmdeploy.model
import
ChatTemplateConfig
chat_template
=
ChatTemplateConfig
(
model_name
,
meta_instruction
=
meta_instruction
).
chat_template
prompt
=
chat_template
.
get_prompt
(
''
)
if
model_name
==
'base'
:
assert
prompt
==
''
else
:
assert
meta_instruction
in
prompt
def
test_base_model
():
...
...
@@ -21,8 +63,6 @@ def test_vicuna():
model
=
MODELS
.
get
(
'vicuna'
)(
capability
=
'completion'
)
assert
model
.
get_prompt
(
prompt
,
sequence_start
=
True
)
==
prompt
assert
model
.
get_prompt
(
prompt
,
sequence_start
=
False
)
==
prompt
assert
model
.
stop_words
is
None
assert
model
.
system
is
not
None
model
=
MODELS
.
get
(
'vicuna'
)(
capability
=
'chat'
,
system
=
'Provide answers in Python'
)
...
...
@@ -34,7 +74,7 @@ def test_vicuna():
_prompt
=
None
with
pytest
.
raises
(
AssertionError
):
_prompt
=
model
.
get_prompt
(
prompt
,
sequence_start
=
True
)
assert
_prompt
is
None
assert
_prompt
is
None
def
test_internlm_chat
():
...
...
@@ -43,7 +83,7 @@ def test_internlm_chat():
assert
model
.
get_prompt
(
prompt
,
sequence_start
=
True
)
==
prompt
assert
model
.
get_prompt
(
prompt
,
sequence_start
=
False
)
==
prompt
assert
model
.
stop_words
is
not
None
assert
model
.
system
==
''
assert
model
.
system
==
'
<|System|>:
'
assert
model
.
session_len
==
2048
model
=
MODELS
.
get
(
'internlm-chat-7b'
)(
capability
=
'chat'
,
...
...
@@ -56,7 +96,7 @@ def test_internlm_chat():
_prompt
=
None
with
pytest
.
raises
(
AssertionError
):
_prompt
=
model
.
get_prompt
(
prompt
,
sequence_start
=
True
)
assert
_prompt
is
None
assert
_prompt
is
None
model
=
MODELS
.
get
(
'internlm-chat-7b-8k'
)()
assert
model
.
session_len
==
8192
...
...
@@ -68,7 +108,6 @@ def test_baichuan():
assert
model
.
get_prompt
(
prompt
,
sequence_start
=
True
)
==
prompt
assert
model
.
get_prompt
(
prompt
,
sequence_start
=
False
)
==
prompt
assert
model
.
stop_words
is
None
assert
model
.
repetition_penalty
==
1.1
model
=
MODELS
.
get
(
'baichuan-7b'
)(
capability
=
'chat'
)
_prompt
=
model
.
get_prompt
(
prompt
,
sequence_start
=
True
)
...
...
@@ -81,19 +120,19 @@ def test_llama2():
assert
model
.
get_prompt
(
prompt
,
sequence_start
=
True
)
==
prompt
assert
model
.
get_prompt
(
prompt
,
sequence_start
=
False
)
==
prompt
assert
model
.
stop_words
is
None
assert
model
.
default_sys_prompt
is
not
None
assert
model
.
meta_instruction
is
not
None
model
=
MODELS
.
get
(
'llama2'
)(
capability
=
'chat'
,
system
=
'Provide answers in Python'
)
meta_instruction
=
'Provide answers in Python'
)
assert
model
.
get_prompt
(
prompt
,
sequence_start
=
True
)
!=
prompt
assert
model
.
get_prompt
(
prompt
,
sequence_start
=
False
)
!=
prompt
assert
model
.
default_sys_prompt
==
'Provide answers in Python'
assert
model
.
meta_instruction
==
'Provide answers in Python'
model
=
MODELS
.
get
(
'llama2'
)(
capability
=
'voice'
)
_prompt
=
None
with
pytest
.
raises
(
AssertionError
):
_prompt
=
model
.
get_prompt
(
prompt
,
sequence_start
=
True
)
assert
_prompt
is
None
assert
_prompt
is
None
def
test_qwen
():
...
...
@@ -111,7 +150,7 @@ def test_qwen():
_prompt
=
None
with
pytest
.
raises
(
AssertionError
):
_prompt
=
model
.
get_prompt
(
prompt
,
sequence_start
=
True
)
assert
_prompt
is
None
assert
_prompt
is
None
def
test_codellama_completion
():
...
...
@@ -167,39 +206,3 @@ def test_codellama_others():
with
pytest
.
raises
(
AssertionError
):
model
=
MODELS
.
get
(
'codellama'
)(
capability
=
'java'
)
assert
model
is
None
def
test_sampling_param
():
model
=
MODELS
.
get
(
'llama'
)()
default_sampling_param
=
SamplingParam
()
assert
model
.
sampling_param
==
default_sampling_param
model
=
MODELS
.
get
(
'llama'
)(
top_p
=
0.1
,
top_k
=
10
)
assert
model
.
sampling_param
.
top_p
==
0.1
and
\
model
.
sampling_param
.
top_k
==
10
assert
model
.
sampling_param
.
temperature
==
0.8
and
\
model
.
sampling_param
.
repetition_penalty
==
1.0
model
=
MODELS
.
get
(
'codellama'
)(
capability
=
'completion'
)
assert
model
.
sampling_param
.
top_p
==
0.9
and
\
model
.
sampling_param
.
top_k
is
None
and
\
model
.
sampling_param
.
temperature
==
0.2
and
\
model
.
sampling_param
.
repetition_penalty
==
1.0
model
=
MODELS
.
get
(
'codellama'
)(
capability
=
'chat'
)
assert
model
.
sampling_param
.
top_p
==
0.95
and
\
model
.
sampling_param
.
top_k
is
None
and
\
model
.
sampling_param
.
temperature
==
0.2
and
\
model
.
sampling_param
.
repetition_penalty
==
1.0
model
=
MODELS
.
get
(
'codellama'
)(
capability
=
'infilling'
)
assert
model
.
sampling_param
.
top_p
==
0.9
and
\
model
.
sampling_param
.
top_k
is
None
and
\
model
.
sampling_param
.
temperature
==
0.0
and
\
model
.
sampling_param
.
repetition_penalty
==
1.0
model
=
MODELS
.
get
(
'codellama'
)(
capability
=
'python'
)
assert
model
.
sampling_param
.
top_p
==
0.9
and
\
model
.
sampling_param
.
top_k
is
None
and
\
model
.
sampling_param
.
temperature
==
0.2
and
\
model
.
sampling_param
.
repetition_penalty
==
1.0
tests/test_lmdeploy/test_tokenizer.py
View file @
d7117b95
import
random
import
pytest
from
lmdeploy.tokenizer
import
HuggingFaceTokenizer
from
lmdeploy.tokenizer
import
DetokenizeState
,
HuggingFaceTokenizer
@
pytest
.
mark
.
parametrize
(
'model_path'
,
[
'internlm/internlm-chat-7b'
,
'Qwen/Qwen-7B-Chat'
,
'baichuan-inc/Baichuan2-7B-Chat'
,
'upstage/SOLAR-0-70b-16bit'
,
'baichuan-inc/Baichuan-7B'
,
'codellama/CodeLlama-7b-hf'
,
'upstage/SOLAR-0-70b-16bit'
'THUDM/chatglm2-6b'
,
'01-ai/Yi-6B-200k'
,
'01-ai/Yi-34B-Chat'
,
'01-ai/Yi-6B-Chat'
,
'WizardLM/WizardLM-70B-V1.0'
,
'codellama/CodeLlama-34b-Instruct-hf'
,
'tiiuae/falcon-7b'
])
@
pytest
.
mark
.
parametrize
(
'input'
,
[
'hi, this is a test 😆😆! '
*
5
,
'為什麼我還在用繁體字 😆😆 gg! '
*
5
,
' License at
\n
#
\n
#'
+
' '
*
100
+
'ht'
,
' '
])
@
pytest
.
mark
.
parametrize
(
'input'
,
[
'hi, this is a test 😆😆! '
*
5
,
'為什麼我還在用繁體字 😆😆 gg! '
*
5
])
def
test_tokenizer
(
model_path
,
input
):
@
pytest
.
mark
.
parametrize
(
'interval'
,
[
1
,
3
])
@
pytest
.
mark
.
parametrize
(
'skip_special_tokens'
,
[
True
,
False
])
def
test_tokenizer
(
model_path
,
input
,
interval
,
skip_special_tokens
):
tokenizer
=
HuggingFaceTokenizer
(
model_path
)
encoded
=
tokenizer
.
encode
(
input
)
encoded
=
tokenizer
.
encode
(
input
,
False
,
add_special_tokens
=
False
)
output
=
''
offset
=
0
for
i
in
range
(
1
,
len
(
encoded
)
+
1
):
decoded
=
tokenizer
.
decode
(
encoded
[:
i
],
offset
)
if
decoded
.
endswith
(
'�'
):
continue
state
=
DetokenizeState
()
for
i
in
range
(
0
,
len
(
encoded
),
interval
):
offset
=
i
+
interval
if
offset
<
len
(
encoded
):
# lmdeploy may decode nothing when concurrency is high
if
random
.
randint
(
1
,
10
)
<
4
:
offset
-=
interval
decoded
,
state
=
tokenizer
.
detokenize_incrementally
(
encoded
[:
offset
],
state
,
skip_special_tokens
)
output
+=
decoded
offset
=
i
assert
input
==
output
,
'input string should equal to output after enc-dec'
@
pytest
.
mark
.
parametrize
(
'model_path'
,
[
'internlm/internlm-chat-7b'
,
'Qwen/Qwen-7B-Chat'
,
'baichuan-inc/Baichuan2-7B-Chat'
,
'codellama/CodeLlama-7b-hf'
,
'upstage/SOLAR-0-70b-16bit'
])
@
pytest
.
mark
.
parametrize
(
'stop_words'
,
[
'.'
,
' '
,
'?'
,
''
])
def
test_tokenizer_with_stop_words
(
model_path
,
stop_words
):
tokenizer
=
HuggingFaceTokenizer
(
model_path
)
indexes
=
tokenizer
.
indexes_containing_token
(
stop_words
)
assert
indexes
is
not
None
def
test_qwen_vl_decode_special
():
from
lmdeploy.tokenizer
import
Tokenizer
tok
=
Tokenizer
(
'Qwen/Qwen-VL-Chat'
)
try
:
tok
.
decode
([
151857
])
assert
(
0
)
except
Exception
as
e
:
assert
str
(
e
)
==
'Unclosed image token'
Prev
1
…
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment