Unverified Commit abe071b6 authored by Yuanchen's avatar Yuanchen Committed by GitHub
Browse files

fix ColossalEval (#4992)


Co-authored-by: default avatarXu Yuanchen <yuanchen.xu00@gmail.com>
parent 459a88c8
...@@ -60,6 +60,11 @@ class DatasetEvaluator(object): ...@@ -60,6 +60,11 @@ class DatasetEvaluator(object):
sample["output"], ref, all_classes=self.data[category]["inference_kwargs"]["all_classes"] sample["output"], ref, all_classes=self.data[category]["inference_kwargs"]["all_classes"]
), ),
) )
score = max(
score,
metric_helper.accuracy_by_options(sample["input"], sample["output"], ref),
)
softmaxs.append(references[i] if score == 1 else -1) softmaxs.append(references[i] if score == 1 else -1)
else: else:
softmaxs.append(np.argmax(np.array(list(sample["softmax_over_choices"].values())))) softmaxs.append(np.argmax(np.array(list(sample["softmax_over_choices"].values()))))
......
...@@ -443,6 +443,20 @@ def multi_choice_accuracy(prediction, reference, **kwargs): ...@@ -443,6 +443,20 @@ def multi_choice_accuracy(prediction, reference, **kwargs):
return score return score
def accuracy_by_options(question, prediction, reference):
pattern = r"[A-Z]\. [^\n]+"
options = re.findall(pattern, question)
answer = prediction.split("\n\n")[0]
for option in options:
choice, content = option.split(". ", 1)
if choice == reference and content == answer:
return 1
return 0
def combined_single_choice_accuracy(prediction, reference, **kwargs): def combined_single_choice_accuracy(prediction, reference, **kwargs):
return single_choice_accuracy(prediction, reference, **kwargs) return single_choice_accuracy(prediction, reference, **kwargs)
......
...@@ -96,7 +96,7 @@ class HuggingFaceModel(BaseModel): ...@@ -96,7 +96,7 @@ class HuggingFaceModel(BaseModel):
self.logger.warning("pad_token_id is not set for the tokenizer. " "Using eos_token_id as pad_token_id.") self.logger.warning("pad_token_id is not set for the tokenizer. " "Using eos_token_id as pad_token_id.")
if self.tokenizer.eos_token: if self.tokenizer.eos_token:
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token
elif self.tokenizer.eod_id: elif hasattr(self.tokenizer, "eod_id"):
# Qwen has an eod token "<|endoftext|>". # Qwen has an eod token "<|endoftext|>".
self.tokenizer.pad_token_id = self.tokenizer.eod_id self.tokenizer.pad_token_id = self.tokenizer.eod_id
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment