"vscode:/vscode.git/clone" did not exist on "5d386d5b77aad51a976af27cb9a3a1f1a3ed5f7f"
Unverified Commit d4d1330a authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Sync] Fix cmnli, fix vicuna meta template, fix longbench postprocess and other minor fixes (#625)

parent 5329724b
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
from ..base import BaseDataset
......@@ -24,3 +24,9 @@ class LongBenchtriviaqaDataset(BaseDataset):
})
dataset[split] = Dataset.from_list(raw_data)
return dataset
@TEXT_POSTPROCESSORS.register_module()
def triviaqa_postprocess(text: str) -> str:
text = text.lstrip('\n').split('\n')[0]
return text
......@@ -46,6 +46,9 @@ class HuggingFace(BaseModel):
mode (str, optional): The method of input truncation when input length
exceeds max_seq_len. 'mid' represents the part of input to
truncate. Defaults to 'none'.
use_fastchat_template (str, optional): Whether to use fastchat to get
the conversation template. If True, fastchat needs to be
implemented first. Defaults to False.
Note:
About ``extract_pred_after_decode``: Commonly, we should extract the
......@@ -68,7 +71,8 @@ class HuggingFace(BaseModel):
extract_pred_after_decode: bool = False,
batch_padding: bool = False,
pad_token_id: Optional[int] = None,
mode: str = 'none'):
mode: str = 'none',
use_fastchat_template: bool = False):
super().__init__(path=path,
max_seq_len=max_seq_len,
tokenizer_only=tokenizer_only,
......@@ -91,6 +95,7 @@ class HuggingFace(BaseModel):
model_kwargs=model_kwargs,
peft_path=peft_path)
self.generation_kwargs = generation_kwargs
self.use_fastchat_template = use_fastchat_template
def _load_tokenizer(self, path: str, tokenizer_path: Optional[str],
tokenizer_kwargs: dict):
......@@ -220,6 +225,20 @@ class HuggingFace(BaseModel):
if self.extract_pred_after_decode:
prompt_lens = [len(input_) for input_ in inputs]
if self.use_fastchat_template:
try:
from fastchat.model import get_conversation_template
except ModuleNotFoundError:
raise ModuleNotFoundError(
'Fastchat is not implemented. You can use '
'\'pip install "fschat[model_worker,webui]"\' '
'to implement fastchat.')
for i in range(len(inputs)):
conv = get_conversation_template('vicuna')
conv.append_message(conv.roles[0], inputs[i])
conv.append_message(conv.roles[1], None)
inputs[i] = conv.get_prompt()
# step-1: tokenize the input with batch_encode_plus
tokens = self.tokenizer.batch_encode_plus(inputs,
padding=True,
......@@ -263,6 +282,19 @@ class HuggingFace(BaseModel):
if self.extract_pred_after_decode:
prompt_lens = [len(input_) for input_ in inputs]
if self.use_fastchat_template:
try:
from fastchat.model import get_conversation_template
except ModuleNotFoundError:
raise ModuleNotFoundError(
'Fastchat is not implemented. You can use '
'\'pip install "fschat[model_worker,webui]"\' '
'to implement fastchat.')
conv = get_conversation_template('vicuna')
conv.append_message(conv.roles[0], inputs[0])
conv.append_message(conv.roles[1], None)
inputs = [conv.get_prompt()]
if self.mode == 'mid':
input_ids = self.tokenizer(inputs, truncation=False)['input_ids']
input_ids = torch.tensor(input_ids, device=self.model.device)
......@@ -491,7 +523,8 @@ class HuggingFaceChatGLM3(HuggingFace):
def generate(self,
inputs: List[str or PromptList],
max_out_len: int = 512,
temperature: float = 0.6) -> str:
temperature: float = 0.6,
skip_overlength=False) -> str:
"""Generate response from input prompt.
Args:
......@@ -518,6 +551,20 @@ class HuggingFaceChatGLM3(HuggingFace):
history.append(msg)
user_content = history[-1]['content']
history = history[:-1]
if skip_overlength:
# The model will report the following error
# if the sequence length is greater than the maximum length:
# "Input length of input_ids is {INPUT_IDS},
# but `max_length` is set to 8192.
# This can lead to unexpected behavior.
# You should consider increasing `max_new_tokens`."
# The following hardcode can fix this exception.
len_user_content = len(self.tokenizer.encode(user_content))
if len_user_content > 8192:
responses.append('')
continue
try:
response, history = self.model.chat(self.tokenizer,
user_content,
......
......@@ -141,12 +141,19 @@ class Llama2Chat(BaseModel):
path: str,
max_seq_len: int,
max_batch_size: int,
tokenizer_path: Optional[str] = None):
tokenizer_path: Optional[str] = None,
force_bf16=False):
from llama import Llama
self.generator = Llama.build(path, tokenizer_path, max_seq_len,
max_batch_size)
self.tokenizer = self.generator.tokenizer
self.model = self.generator.model
if force_bf16:
# force set model to `bfloat16` to fix
# the exception of 'RuntimeError: probability tensor
# contains either `inf`, `nan` or element < 0',
# encountered during the inference of llama2-7b
self.model = self.model.bfloat16()
def _load_tokenizer(self, tokenizer_path: str):
from llama import Tokenizer
......
......@@ -108,7 +108,11 @@ class GenInferencer(BaseInferencer):
'tmp_' + output_json_filename)
if osp.exists(tmp_json_filepath):
# TODO: move resume to output handler
try:
tmp_result_dict = mmengine.load(tmp_json_filepath)
except Exception:
pass
else:
output_handler.results_dict = tmp_result_dict
index = len(tmp_result_dict)
......
......@@ -96,7 +96,7 @@ class SlurmSequentialRunner(BaseRunner):
try:
parent_conns = []
num_workers = min(self.max_num_workers, len(tasks))
num_workers = max(min(self.max_num_workers, len(tasks)), 1)
with Pool(processes=num_workers) as pool:
for task in tasks:
parent_conn, child_conn = Pipe()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment