Unverified Commit 558029b6 authored by AllentDan's avatar AllentDan Committed by GitHub
Browse files

add encode for opencompass (#828)

* add encode for opencompass

* doc

* remove **kwargs
parent 872701e3
...@@ -12,7 +12,7 @@ The user can open the http url print by the following command in a browser. ...@@ -12,7 +12,7 @@ The user can open the http url print by the following command in a browser.
lmdeploy serve api_server ./workspace --server_name 0.0.0.0 --server_port ${server_port} --instance_num 64 --tp 1 lmdeploy serve api_server ./workspace --server_name 0.0.0.0 --server_port ${server_port} --instance_num 64 --tp 1
``` ```
We provide four restful api in total. Three of them are in OpenAI format. We provide some RESTful APIs. Three of them are in OpenAI format.
- /v1/chat/completions - /v1/chat/completions
- /v1/models - /v1/models
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
lmdeploy serve api_server ./workspace 0.0.0.0 --server_port ${server_port} --instance_num 64 --tp 1 lmdeploy serve api_server ./workspace 0.0.0.0 --server_port ${server_port} --instance_num 64 --tp 1
``` ```
我们一共提供四个 restful api,其中三个仿照 OpenAI 的形式。 我们提供的 restful api,其中三个仿照 OpenAI 的形式。
- /v1/chat/completions - /v1/chat/completions
- /v1/models - /v1/models
......
...@@ -28,6 +28,7 @@ class APIClient: ...@@ -28,6 +28,7 @@ class APIClient:
self.chat_completions_v1_url = f'{api_server_url}/v1/chat/completions' self.chat_completions_v1_url = f'{api_server_url}/v1/chat/completions'
self.completions_v1_url = f'{api_server_url}/v1/completions' self.completions_v1_url = f'{api_server_url}/v1/completions'
self.models_v1_url = f'{api_server_url}/v1/models' self.models_v1_url = f'{api_server_url}/v1/models'
self.encode_v1_url = f'{api_server_url}/v1/encode'
self._available_models = None self._available_models = None
@property @property
...@@ -43,6 +44,31 @@ class APIClient: ...@@ -43,6 +44,31 @@ class APIClient:
return self._available_models return self._available_models
return None return None
def encode(self,
input: Union[str, List[str]],
do_preprocess: Optional[bool] = False,
add_bos: Optional[bool] = True):
"""Encode prompts.
Args:
input: the prompt to be encoded. In str or List[str] format.
do_preprocess: whether do preprocess or not. Default to False.
add_bos: True when it is the beginning of a conversation. False
when it is not. Default to True.
Return: (input_ids, length)
"""
headers = {'content-type': 'application/json'}
response = requests.post(self.encode_v1_url,
headers=headers,
json=dict(input=input,
do_preprocess=do_preprocess,
add_bos=add_bos),
stream=False)
if hasattr(response, 'text'):
output = json.loads(response.text)
return output['input_ids'], output['length']
return None, None
def chat_completions_v1(self, def chat_completions_v1(self,
model: str, model: str,
messages: Union[str, List[Dict[str, str]]], messages: Union[str, List[Dict[str, str]]],
......
...@@ -18,8 +18,9 @@ from lmdeploy.serve.openai.protocol import ( # noqa: E501 ...@@ -18,8 +18,9 @@ from lmdeploy.serve.openai.protocol import ( # noqa: E501
ChatCompletionStreamResponse, ChatMessage, CompletionRequest, ChatCompletionStreamResponse, ChatMessage, CompletionRequest,
CompletionResponse, CompletionResponseChoice, CompletionResponse, CompletionResponseChoice,
CompletionResponseStreamChoice, CompletionStreamResponse, DeltaMessage, CompletionResponseStreamChoice, CompletionStreamResponse, DeltaMessage,
EmbeddingsRequest, ErrorResponse, GenerateRequest, GenerateResponse, EmbeddingsRequest, EncodeRequest, EncodeResponse, ErrorResponse,
ModelCard, ModelList, ModelPermission, UsageInfo) GenerateRequest, GenerateResponse, ModelCard, ModelList, ModelPermission,
UsageInfo)
class VariableInterface: class VariableInterface:
...@@ -393,6 +394,37 @@ async def create_embeddings(request: EmbeddingsRequest, ...@@ -393,6 +394,37 @@ async def create_embeddings(request: EmbeddingsRequest,
'Unsupported by turbomind.') 'Unsupported by turbomind.')
@app.post('/v1/encode')
async def encode(request: EncodeRequest, raw_request: Request = None):
"""Encode prompts.
The request should be a JSON object with the following fields:
- input: the prompt to be encoded. In str or List[str] format.
- do_preprocess: whether do preprocess or not. Default to False.
- add_bos: True when it is the beginning of a conversation. False when it
is not. Default to True.
"""
def encode(prompt: str, do_preprocess: bool, add_bos: bool):
if do_preprocess:
prompt = VariableInterface.async_engine.model.get_prompt(
prompt, sequence_start=add_bos)
input_ids = VariableInterface.async_engine.tokenizer.encode(
prompt, add_bos=add_bos)
return input_ids
if isinstance(request.input, str):
encoded = encode(request.input, request.do_preprocess, request.add_bos)
return EncodeResponse(input_ids=encoded, length=len(encoded))
else:
encoded, length = [], []
for prompt in request.input:
ids = encode(prompt, request.do_preprocess, request.add_bos)
encoded.append(ids)
length.append(len(ids))
return EncodeResponse(input_ids=encoded, length=length)
@app.post('/generate', @app.post('/generate',
tags=['deprecated'], tags=['deprecated'],
description='please use /v1/chat/interactive') description='please use /v1/chat/interactive')
......
...@@ -191,6 +191,19 @@ class EmbeddingsResponse(BaseModel): ...@@ -191,6 +191,19 @@ class EmbeddingsResponse(BaseModel):
usage: UsageInfo usage: UsageInfo
class EncodeRequest(BaseModel):
"""Encode request."""
input: Union[str, List[str]]
do_preprocess: Optional[bool] = False
add_bos: Optional[bool] = True
class EncodeResponse(BaseModel):
"""Encode response."""
input_ids: Union[List[int], List[List[int]]]
length: Union[int, List[int]]
class GenerateRequest(BaseModel): class GenerateRequest(BaseModel):
"""Generate request.""" """Generate request."""
prompt: Union[str, List[Dict[str, str]]] prompt: Union[str, List[Dict[str, str]]]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment