Unverified Commit 616b59f3 authored by rainred's avatar rainred Committed by GitHub
Browse files

[Feature] modify Runtime to support skip_tokenizer_init (#1088)


Co-authored-by: default avatarlzhang <zhanglei@modelbest.cn>
parent c8423ca3
...@@ -533,6 +533,13 @@ class Runtime: ...@@ -533,6 +533,13 @@ class Runtime:
prompt: str, prompt: str,
sampling_params: Optional[Dict] = None, sampling_params: Optional[Dict] = None,
): ):
if self.server_args.skip_tokenizer_init:
json_data = {
"input_ids": prompt,
"sampling_params": sampling_params,
"stream": True,
}
else:
json_data = { json_data = {
"text": prompt, "text": prompt,
"sampling_params": sampling_params, "sampling_params": sampling_params,
...@@ -549,10 +556,13 @@ class Runtime: ...@@ -549,10 +556,13 @@ class Runtime:
if chunk == "data: [DONE]\n\n": if chunk == "data: [DONE]\n\n":
break break
data = json.loads(chunk[5:].strip("\n")) data = json.loads(chunk[5:].strip("\n"))
if hasattr(data, "text"):
cur = data["text"][pos:] cur = data["text"][pos:]
if cur: if cur:
yield cur yield cur
pos += len(cur) pos += len(cur)
else:
yield data
add_request = async_generate add_request = async_generate
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment