"docs/vscode:/vscode.git/clone" did not exist on "f00cd6efbd00b0273f58c393a617415b5d1d410e"
Unverified Commit f30abd09 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Improve error message & Add vicuna template (#57)

parent 40ab1f01
...@@ -388,3 +388,15 @@ register_conv_template( ...@@ -388,3 +388,15 @@ register_conv_template(
stop_str=["<|endoftext|>", "<|im_end|>"], stop_str=["<|endoftext|>", "<|im_end|>"],
) )
) )
register_conv_template(
Conversation(
name="vicuna_v1.1",
system_message="A chat between a curious user and an artificial intelligence assistant. "
"The assistant gives helpful, detailed, and polite answers to the user's questions.",
roles=("USER", "ASSISTANT"),
sep_style=SeparatorStyle.ADD_COLON_TWO,
sep=" ",
sep2="</s>",
)
)
...@@ -297,6 +297,11 @@ class ModelRunner: ...@@ -297,6 +297,11 @@ class ModelRunner:
def init_memory_pool(self, total_gpu_memory): def init_memory_pool(self, total_gpu_memory):
self.max_total_num_token = self.profile_max_num_token(total_gpu_memory) self.max_total_num_token = self.profile_max_num_token(total_gpu_memory)
if self.max_total_num_token <= 0:
raise RuntimeError("Not enought memory. "
"Please try to increase --mem-fraction-static.")
self.req_to_token_pool = ReqToTokenPool( self.req_to_token_pool = ReqToTokenPool(
int(self.max_total_num_token / self.model_config.context_len * 256), int(self.max_total_num_token / self.model_config.context_len * 256),
self.model_config.context_len + 8, self.model_config.context_len + 8,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment