Commit 2cec992f authored by zhouxiang's avatar zhouxiang
Browse files

修复batch推理bug

parent 15d855b9
......@@ -483,7 +483,7 @@ class model:
response = ctypes.string_at(outputs[i]).decode()
responses.append(response)
historys[i] = historys[i] + [(querys[i], response)]
fastllm_lib.freeCharArray(outputs)
fastllm_lib.freeCharArray(outputs, query_size)
return responses, historys
def chat_batch(self, tokenizer, querys: List[str], historys: List[List[Tuple[str, str]]] = None, max_length: int = 1024,
......@@ -511,7 +511,7 @@ class model:
response = ctypes.string_at(outputs[i]).decode()
responses.append(response)
historys[i] = historys[i] + [(querys[i], response)]
fastllm_lib.freeCharArray(outputs)
fastllm_lib.freeCharArray(outputs, query_size)
return responses, historys
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment