Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
336d8d76
Unverified
Commit
336d8d76
authored
Dec 24, 2023
by
AllentDan
Committed by
GitHub
Dec 24, 2023
Browse files
add turbomind restful api support (#693)
* add turbomind restful api support * config * top_p 0.8 * top_k = 1
parent
e985100c
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
197 additions
and
0 deletions
+197
-0
configs/eval_internlm_chat_turbomind_api.py
configs/eval_internlm_chat_turbomind_api.py
+39
-0
configs/eval_internlm_turbomind_api.py
configs/eval_internlm_turbomind_api.py
+28
-0
opencompass/models/turbomind_api.py
opencompass/models/turbomind_api.py
+130
-0
No files found.
configs/eval_internlm_chat_turbomind_api.py
0 → 100644
View file @
336d8d76
from
mmengine.config
import
read_base
from
opencompass.models.turbomind_api
import
TurboMindAPIModel
with
read_base
():
# choose a list of datasets
from
.datasets.mmlu.mmlu_gen_a484b3
import
mmlu_datasets
from
.datasets.ceval.ceval_gen_5f30c7
import
ceval_datasets
from
.datasets.SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864
import
WiC_datasets
from
.datasets.triviaqa.triviaqa_gen_2121ce
import
triviaqa_datasets
from
.datasets.gsm8k.gsm8k_gen_1d7fe4
import
gsm8k_datasets
from
.datasets.humaneval.humaneval_gen_8e312c
import
humaneval_datasets
from
.datasets.race.race_gen_69ee4f
import
race_datasets
from
.datasets.crowspairs.crowspairs_gen_381af0
import
crowspairs_datasets
# and output the results in a choosen format
from
.summarizers.medium
import
summarizer
datasets
=
sum
((
v
for
k
,
v
in
locals
().
items
()
if
k
.
endswith
(
'_datasets'
)),
[])
meta_template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
begin
=
'<|User|>:'
,
end
=
'
\n
'
),
dict
(
role
=
'BOT'
,
begin
=
'<|Bot|>:'
,
end
=
'<eoa>
\n
'
,
generate
=
True
),
],
eos_token_id
=
103028
)
models
=
[
dict
(
type
=
TurboMindAPIModel
,
abbr
=
'internlm-chat-20b-turbomind'
,
path
=
"internlm-chat-20b"
,
api_addr
=
'http://0.0.0.0:23333'
,
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
8
,
meta_template
=
meta_template
,
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
)
]
configs/eval_internlm_turbomind_api.py
0 → 100644
View file @
336d8d76
from
mmengine.config
import
read_base
from
opencompass.models.turbomind_api
import
TurboMindAPIModel
with
read_base
():
# choose a list of datasets
from
.datasets.mmlu.mmlu_gen_a484b3
import
mmlu_datasets
from
.datasets.ceval.ceval_gen_5f30c7
import
ceval_datasets
from
.datasets.SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864
import
WiC_datasets
from
.datasets.triviaqa.triviaqa_gen_2121ce
import
triviaqa_datasets
from
.datasets.gsm8k.gsm8k_gen_1d7fe4
import
gsm8k_datasets
from
.datasets.humaneval.humaneval_gen_8e312c
import
humaneval_datasets
# and output the results in a choosen format
from
.summarizers.medium
import
summarizer
datasets
=
sum
((
v
for
k
,
v
in
locals
().
items
()
if
k
.
endswith
(
'_datasets'
)),
[])
models
=
[
dict
(
type
=
TurboMindAPIModel
,
abbr
=
'internlm-chat-20b-turbomind'
,
path
=
"internlm-chat-20b"
,
api_addr
=
'http://0.0.0.0:23333'
,
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
8
,
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
)
]
opencompass/models/turbomind_api.py
0 → 100644
View file @
336d8d76
import
threading
from
concurrent.futures
import
ThreadPoolExecutor
from
typing
import
Dict
,
List
,
Optional
,
Union
from
opencompass.models.base
import
BaseModel
,
LMTemplateParser
from
opencompass.utils.logging
import
get_logger
from
opencompass.utils.prompt
import
PromptList
PromptType
=
Union
[
PromptList
,
str
]
def
valid_str
(
string
,
coding
=
'utf-8'
):
"""decode text according to its encoding type."""
invalid_chars
=
[
b
'
\xef\xbf\xbd
'
]
bstr
=
bytes
(
string
,
coding
)
for
invalid_char
in
invalid_chars
:
bstr
=
bstr
.
replace
(
invalid_char
,
b
''
)
ret
=
bstr
.
decode
(
encoding
=
coding
,
errors
=
'ignore'
)
return
ret
class
TurboMindAPIModel
(
BaseModel
):
"""Model wrapper for TurboMind Triton Inference Server gRPC API.
Args:
path (str): The name of OpenAI's model.
tis_addr (str): The address (ip:port format) of turbomind's
triton inference server
max_seq_len (int): The maximum allowed sequence length of a model.
Note that the length of prompt + generated tokens shall not exceed
this value. Defaults to 2048.
meta_template (Dict, optional): The model's meta prompt
template if needed, in case the requirement of injecting or
wrapping of any meta instructions.
"""
is_api
:
bool
=
True
def
__init__
(
self
,
path
:
str
,
api_addr
:
str
=
'http://0.0.0.0:23333'
,
max_seq_len
:
int
=
2048
,
meta_template
:
Optional
[
Dict
]
=
None
,
):
super
().
__init__
(
path
=
path
,
max_seq_len
=
max_seq_len
,
meta_template
=
meta_template
)
from
lmdeploy.serve.openai.api_client
import
APIClient
self
.
chatbot
=
APIClient
(
api_addr
)
self
.
model_name
=
self
.
chatbot
.
available_models
[
0
]
self
.
logger
=
get_logger
()
self
.
template_parser
=
LMTemplateParser
(
meta_template
)
self
.
eos_token_id
=
None
if
meta_template
and
'eos_token_id'
in
meta_template
:
self
.
eos_token_id
=
meta_template
[
'eos_token_id'
]
self
.
api_addr
=
api_addr
def
generate
(
self
,
inputs
:
List
[
str
or
PromptList
],
max_out_len
:
int
=
512
,
temperature
:
float
=
1.0
,
)
->
List
[
str
]:
"""Generate results given a list of inputs.
Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
temperature (float): What sampling temperature to use,
between 0 and 2. Higher values like 0.8 will make the output
more random, while lower values like 0.2 will make it more
focused and deterministic. Defaults to 0.7.
Returns:
List[str]: A list of generated strings.
"""
with
ThreadPoolExecutor
()
as
executor
:
results
=
list
(
executor
.
map
(
self
.
_generate
,
inputs
,
[
max_out_len
]
*
len
(
inputs
),
[
temperature
]
*
len
(
inputs
)))
return
results
def
get_token_len
(
self
,
prompt
:
str
)
->
int
:
input_ids
,
length
=
self
.
chatbot
.
encode
(
prompt
)
return
length
def
wait
(
self
):
"""Wait till the next query can be sent.
Applicable in both single-thread and multi-thread environments.
"""
return
self
.
token_bucket
.
get_token
()
def
_generate
(
self
,
prompt
:
str
or
PromptList
,
max_out_len
:
int
,
temperature
:
float
)
->
str
:
"""Generate results given a list of inputs.
Args:
prompt (str or PromptList): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
temperature (float): What sampling temperature to use,
between 0 and 2. Higher values like 0.8 will make the output
more random, while lower values like 0.2 will make it more
focused and deterministic.
Returns:
str: The generated string.
"""
assert
type
(
prompt
)
is
str
,
'We only support string for TurboMind RPC API'
response
=
''
for
output
in
self
.
chatbot
.
completions_v1
(
session_id
=
threading
.
currentThread
().
ident
,
prompt
=
prompt
,
model
=
self
.
model_name
,
max_tokens
=
max_out_len
,
temperature
=
temperature
,
top_p
=
0.8
,
top_k
=
1
):
response
+=
output
[
'choices'
][
0
][
'text'
]
response
=
valid_str
(
response
)
return
response
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment