Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
558029b6
Unverified
Commit
558029b6
authored
Dec 13, 2023
by
AllentDan
Committed by
GitHub
Dec 13, 2023
Browse files
add encode for opencompass (#828)
* add encode for opencompass * doc * remove **kwargs
parent
872701e3
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
75 additions
and
4 deletions
+75
-4
docs/en/restful_api.md
docs/en/restful_api.md
+1
-1
docs/zh_cn/restful_api.md
docs/zh_cn/restful_api.md
+1
-1
lmdeploy/serve/openai/api_client.py
lmdeploy/serve/openai/api_client.py
+26
-0
lmdeploy/serve/openai/api_server.py
lmdeploy/serve/openai/api_server.py
+34
-2
lmdeploy/serve/openai/protocol.py
lmdeploy/serve/openai/protocol.py
+13
-0
No files found.
docs/en/restful_api.md
View file @
558029b6
...
@@ -12,7 +12,7 @@ The user can open the http url print by the following command in a browser.
...
@@ -12,7 +12,7 @@ The user can open the http url print by the following command in a browser.
lmdeploy serve api_server ./workspace
--server_name
0.0.0.0
--server_port
${
server_port
}
--instance_num
64
--tp
1
lmdeploy serve api_server ./workspace
--server_name
0.0.0.0
--server_port
${
server_port
}
--instance_num
64
--tp
1
```
```
We provide
four restful api in total
. Three of them are in OpenAI format.
We provide
some RESTful APIs
. Three of them are in OpenAI format.
-
/v1/chat/completions
-
/v1/chat/completions
-
/v1/models
-
/v1/models
...
...
docs/zh_cn/restful_api.md
View file @
558029b6
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
lmdeploy serve api_server ./workspace 0.0.0.0
--server_port
${
server_port
}
--instance_num
64
--tp
1
lmdeploy serve api_server ./workspace 0.0.0.0
--server_port
${
server_port
}
--instance_num
64
--tp
1
```
```
我们
一共提供四个
restful api,其中三个仿照 OpenAI 的形式。
我们
提供的
restful api,其中三个仿照 OpenAI 的形式。
-
/v1/chat/completions
-
/v1/chat/completions
-
/v1/models
-
/v1/models
...
...
lmdeploy/serve/openai/api_client.py
View file @
558029b6
...
@@ -28,6 +28,7 @@ class APIClient:
...
@@ -28,6 +28,7 @@ class APIClient:
self
.
chat_completions_v1_url
=
f
'
{
api_server_url
}
/v1/chat/completions'
self
.
chat_completions_v1_url
=
f
'
{
api_server_url
}
/v1/chat/completions'
self
.
completions_v1_url
=
f
'
{
api_server_url
}
/v1/completions'
self
.
completions_v1_url
=
f
'
{
api_server_url
}
/v1/completions'
self
.
models_v1_url
=
f
'
{
api_server_url
}
/v1/models'
self
.
models_v1_url
=
f
'
{
api_server_url
}
/v1/models'
self
.
encode_v1_url
=
f
'
{
api_server_url
}
/v1/encode'
self
.
_available_models
=
None
self
.
_available_models
=
None
@
property
@
property
...
@@ -43,6 +44,31 @@ class APIClient:
...
@@ -43,6 +44,31 @@ class APIClient:
return
self
.
_available_models
return
self
.
_available_models
return
None
return
None
def
encode
(
self
,
input
:
Union
[
str
,
List
[
str
]],
do_preprocess
:
Optional
[
bool
]
=
False
,
add_bos
:
Optional
[
bool
]
=
True
):
"""Encode prompts.
Args:
input: the prompt to be encoded. In str or List[str] format.
do_preprocess: whether do preprocess or not. Default to False.
add_bos: True when it is the beginning of a conversation. False
when it is not. Default to True.
Return: (input_ids, length)
"""
headers
=
{
'content-type'
:
'application/json'
}
response
=
requests
.
post
(
self
.
encode_v1_url
,
headers
=
headers
,
json
=
dict
(
input
=
input
,
do_preprocess
=
do_preprocess
,
add_bos
=
add_bos
),
stream
=
False
)
if
hasattr
(
response
,
'text'
):
output
=
json
.
loads
(
response
.
text
)
return
output
[
'input_ids'
],
output
[
'length'
]
return
None
,
None
def
chat_completions_v1
(
self
,
def
chat_completions_v1
(
self
,
model
:
str
,
model
:
str
,
messages
:
Union
[
str
,
List
[
Dict
[
str
,
str
]]],
messages
:
Union
[
str
,
List
[
Dict
[
str
,
str
]]],
...
...
lmdeploy/serve/openai/api_server.py
View file @
558029b6
...
@@ -18,8 +18,9 @@ from lmdeploy.serve.openai.protocol import ( # noqa: E501
...
@@ -18,8 +18,9 @@ from lmdeploy.serve.openai.protocol import ( # noqa: E501
ChatCompletionStreamResponse
,
ChatMessage
,
CompletionRequest
,
ChatCompletionStreamResponse
,
ChatMessage
,
CompletionRequest
,
CompletionResponse
,
CompletionResponseChoice
,
CompletionResponse
,
CompletionResponseChoice
,
CompletionResponseStreamChoice
,
CompletionStreamResponse
,
DeltaMessage
,
CompletionResponseStreamChoice
,
CompletionStreamResponse
,
DeltaMessage
,
EmbeddingsRequest
,
ErrorResponse
,
GenerateRequest
,
GenerateResponse
,
EmbeddingsRequest
,
EncodeRequest
,
EncodeResponse
,
ErrorResponse
,
ModelCard
,
ModelList
,
ModelPermission
,
UsageInfo
)
GenerateRequest
,
GenerateResponse
,
ModelCard
,
ModelList
,
ModelPermission
,
UsageInfo
)
class
VariableInterface
:
class
VariableInterface
:
...
@@ -393,6 +394,37 @@ async def create_embeddings(request: EmbeddingsRequest,
...
@@ -393,6 +394,37 @@ async def create_embeddings(request: EmbeddingsRequest,
'Unsupported by turbomind.'
)
'Unsupported by turbomind.'
)
@
app
.
post
(
'/v1/encode'
)
async
def
encode
(
request
:
EncodeRequest
,
raw_request
:
Request
=
None
):
"""Encode prompts.
The request should be a JSON object with the following fields:
- input: the prompt to be encoded. In str or List[str] format.
- do_preprocess: whether do preprocess or not. Default to False.
- add_bos: True when it is the beginning of a conversation. False when it
is not. Default to True.
"""
def
encode
(
prompt
:
str
,
do_preprocess
:
bool
,
add_bos
:
bool
):
if
do_preprocess
:
prompt
=
VariableInterface
.
async_engine
.
model
.
get_prompt
(
prompt
,
sequence_start
=
add_bos
)
input_ids
=
VariableInterface
.
async_engine
.
tokenizer
.
encode
(
prompt
,
add_bos
=
add_bos
)
return
input_ids
if
isinstance
(
request
.
input
,
str
):
encoded
=
encode
(
request
.
input
,
request
.
do_preprocess
,
request
.
add_bos
)
return
EncodeResponse
(
input_ids
=
encoded
,
length
=
len
(
encoded
))
else
:
encoded
,
length
=
[],
[]
for
prompt
in
request
.
input
:
ids
=
encode
(
prompt
,
request
.
do_preprocess
,
request
.
add_bos
)
encoded
.
append
(
ids
)
length
.
append
(
len
(
ids
))
return
EncodeResponse
(
input_ids
=
encoded
,
length
=
length
)
@
app
.
post
(
'/generate'
,
@
app
.
post
(
'/generate'
,
tags
=
[
'deprecated'
],
tags
=
[
'deprecated'
],
description
=
'please use /v1/chat/interactive'
)
description
=
'please use /v1/chat/interactive'
)
...
...
lmdeploy/serve/openai/protocol.py
View file @
558029b6
...
@@ -191,6 +191,19 @@ class EmbeddingsResponse(BaseModel):
...
@@ -191,6 +191,19 @@ class EmbeddingsResponse(BaseModel):
usage
:
UsageInfo
usage
:
UsageInfo
class
EncodeRequest
(
BaseModel
):
"""Encode request."""
input
:
Union
[
str
,
List
[
str
]]
do_preprocess
:
Optional
[
bool
]
=
False
add_bos
:
Optional
[
bool
]
=
True
class
EncodeResponse
(
BaseModel
):
"""Encode response."""
input_ids
:
Union
[
List
[
int
],
List
[
List
[
int
]]]
length
:
Union
[
int
,
List
[
int
]]
class
GenerateRequest
(
BaseModel
):
class
GenerateRequest
(
BaseModel
):
"""Generate request."""
"""Generate request."""
prompt
:
Union
[
str
,
List
[
Dict
[
str
,
str
]]]
prompt
:
Union
[
str
,
List
[
Dict
[
str
,
str
]]]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment