Unverified Commit 558029b6 authored by AllentDan's avatar AllentDan Committed by GitHub
Browse files

add encode for opencompass (#828)

* add encode for opencompass

* doc

* remove **kwargs
parent 872701e3
......@@ -12,7 +12,7 @@ The user can open the http url print by the following command in a browser.
lmdeploy serve api_server ./workspace --server_name 0.0.0.0 --server_port ${server_port} --instance_num 64 --tp 1
```
We provide four restful api in total. Three of them are in OpenAI format.
We provide some RESTful APIs. Three of them are in OpenAI format.
- /v1/chat/completions
- /v1/models
......
......@@ -12,7 +12,7 @@
lmdeploy serve api_server ./workspace 0.0.0.0 --server_port ${server_port} --instance_num 64 --tp 1
```
我们一共提供四个 restful api,其中三个仿照 OpenAI 的形式。
我们提供的 restful api,其中三个仿照 OpenAI 的形式。
- /v1/chat/completions
- /v1/models
......
......@@ -28,6 +28,7 @@ class APIClient:
self.chat_completions_v1_url = f'{api_server_url}/v1/chat/completions'
self.completions_v1_url = f'{api_server_url}/v1/completions'
self.models_v1_url = f'{api_server_url}/v1/models'
self.encode_v1_url = f'{api_server_url}/v1/encode'
self._available_models = None
@property
......@@ -43,6 +44,31 @@ class APIClient:
return self._available_models
return None
def encode(self,
input: Union[str, List[str]],
do_preprocess: Optional[bool] = False,
add_bos: Optional[bool] = True):
"""Encode prompts.
Args:
input: the prompt to be encoded. In str or List[str] format.
do_preprocess: whether do preprocess or not. Default to False.
add_bos: True when it is the beginning of a conversation. False
when it is not. Default to True.
Return: (input_ids, length)
"""
headers = {'content-type': 'application/json'}
response = requests.post(self.encode_v1_url,
headers=headers,
json=dict(input=input,
do_preprocess=do_preprocess,
add_bos=add_bos),
stream=False)
if hasattr(response, 'text'):
output = json.loads(response.text)
return output['input_ids'], output['length']
return None, None
def chat_completions_v1(self,
model: str,
messages: Union[str, List[Dict[str, str]]],
......
......@@ -18,8 +18,9 @@ from lmdeploy.serve.openai.protocol import ( # noqa: E501
ChatCompletionStreamResponse, ChatMessage, CompletionRequest,
CompletionResponse, CompletionResponseChoice,
CompletionResponseStreamChoice, CompletionStreamResponse, DeltaMessage,
EmbeddingsRequest, ErrorResponse, GenerateRequest, GenerateResponse,
ModelCard, ModelList, ModelPermission, UsageInfo)
EmbeddingsRequest, EncodeRequest, EncodeResponse, ErrorResponse,
GenerateRequest, GenerateResponse, ModelCard, ModelList, ModelPermission,
UsageInfo)
class VariableInterface:
......@@ -393,6 +394,37 @@ async def create_embeddings(request: EmbeddingsRequest,
'Unsupported by turbomind.')
@app.post('/v1/encode')
async def encode(request: EncodeRequest, raw_request: Request = None):
"""Encode prompts.
The request should be a JSON object with the following fields:
- input: the prompt to be encoded. In str or List[str] format.
- do_preprocess: whether do preprocess or not. Default to False.
- add_bos: True when it is the beginning of a conversation. False when it
is not. Default to True.
"""
def encode(prompt: str, do_preprocess: bool, add_bos: bool):
if do_preprocess:
prompt = VariableInterface.async_engine.model.get_prompt(
prompt, sequence_start=add_bos)
input_ids = VariableInterface.async_engine.tokenizer.encode(
prompt, add_bos=add_bos)
return input_ids
if isinstance(request.input, str):
encoded = encode(request.input, request.do_preprocess, request.add_bos)
return EncodeResponse(input_ids=encoded, length=len(encoded))
else:
encoded, length = [], []
for prompt in request.input:
ids = encode(prompt, request.do_preprocess, request.add_bos)
encoded.append(ids)
length.append(len(ids))
return EncodeResponse(input_ids=encoded, length=length)
@app.post('/generate',
tags=['deprecated'],
description='please use /v1/chat/interactive')
......
......@@ -191,6 +191,19 @@ class EmbeddingsResponse(BaseModel):
usage: UsageInfo
class EncodeRequest(BaseModel):
"""Encode request."""
input: Union[str, List[str]]
do_preprocess: Optional[bool] = False
add_bos: Optional[bool] = True
class EncodeResponse(BaseModel):
"""Encode response."""
input_ids: Union[List[int], List[List[int]]]
length: Union[int, List[int]]
class GenerateRequest(BaseModel):
"""Generate request."""
prompt: Union[str, List[Dict[str, str]]]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment