Commit 515b9245 authored by PanZezhong's avatar PanZezhong
Browse files

feat: add ceval test script

parent ddea3d19
import sys
from jiuge import *
from datasets import load_dataset
class JiugeForCeval(JiugeForCauslLM):
def __init__(
self, model_dir_path, device=DeviceType.DEVICE_TYPE_CPU, ndev=1, max_tokens=None
):
super().__init__(model_dir_path, device, ndev, max_tokens)
pass
def generate(self, conversation, max_steps, topp_=1.0, topk_=1, temperature_=1.0):
input_content = (
self.tokenizer.apply_chat_template(
conversation=conversation,
add_generation_prompt=True,
tokenize=False,
)
+ "正确答案是"
)
print(input_content, end="", flush=True)
tokens = self.tokenizer.encode(input_content)
infer_task = InferTask(
0,
tokens,
self.max_context_len(),
temperature_,
topk_,
topp_,
self.eos_token_id,
)
infer_task.bind_kvcache(KVCache(self))
steps = 0
total_time = 0
output_content = ""
for step_i in range(max_steps):
start_time = time.time()
output_tokens = self.batch_infer_one_round([infer_task])
end_time = time.time()
steps += 1
output_str = (
self.tokenizer._tokenizer.id_to_token(output_tokens[0])
.replace("▁", " ")
.replace("<0x0A>", "\n")
)
output_content += output_str
print(output_str, end="", flush=True)
if output_tokens[0] in self.eos_token_id:
break
infer_task.next(output_tokens[0])
if step_i > 0:
total_time += end_time - start_time
print("\n")
avg_time = total_time * 1000 / (steps - 1 + 1e-9)
print(f"Time per step: {avg_time:.3f}ms")
infer_task._kv_cache.drop(self)
return output_content, avg_time
def test():
if len(sys.argv) < 3:
print(
"Usage: python test_ceval.py [--cpu | --nvidia| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon] <path/to/model_dir> [n_device]"
)
sys.exit(1)
model_path = sys.argv[2]
device_type = DeviceType.DEVICE_TYPE_CPU
if sys.argv[1] == "--cpu":
device_type = DeviceType.DEVICE_TYPE_CPU
elif sys.argv[1] == "--nvidia":
device_type = DeviceType.DEVICE_TYPE_NVIDIA
elif sys.argv[1] == "--cambricon":
device_type = DeviceType.DEVICE_TYPE_CAMBRICON
elif sys.argv[1] == "--ascend":
device_type = DeviceType.DEVICE_TYPE_ASCEND
elif sys.argv[1] == "--metax":
device_type = DeviceType.DEVICE_TYPE_METAX
elif sys.argv[1] == "--moore":
device_type = DeviceType.DEVICE_TYPE_MOORE
elif sys.argv[1] == "--iluvatar":
device_type = DeviceType.DEVICE_TYPE_ILUVATAR
elif sys.argv[1] == "--kunlun":
device_type = DeviceType.DEVICE_TYPE_KUNLUN
elif sys.argv[1] == "--hygon":
device_type = DeviceType.DEVICE_TYPE_HYGON
else:
print(
"Usage: python test_ceval.py [--cpu | --nvidia| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon] <path/to/model_dir> [n_device]"
)
sys.exit(1)
# https://huggingface.co/datasets/ceval/ceval-exam/tree/main/middle_school_geography
dataset = load_dataset(r"ceval/ceval-exam", name="middle_school_mathematics")
# dataset = load_dataset(r"ceval/ceval-exam", name="high_school_history")
# dataset = load_dataset(r"ceval/ceval-exam", name="high_school_chinese")
# dataset = load_dataset(r"ceval/ceval-exam", name="high_school_physics")
# dataset = load_dataset(r"ceval/ceval-exam", name="middle_school_geography")
# dataset = load_dataset(r"ceval/ceval-exam", name="middle_school_physics")
samples = dataset["val"]
ndev = int(sys.argv[3]) if len(sys.argv) > 3 else 1
model = JiugeForCeval(model_path, device_type, ndev)
answers_list = []
for sample in samples:
input_content = f"'question':{sample['question']},'A': {sample['A']}, 'B':{sample['B']}, 'C': {sample['C']},'D': {sample['D']}。"
conversation = [
{
"role": "system",
"content": "请从question的A,B,C,D四个选项中选择正确的选项。例如,标准答案:A。",
},
{"role": "user", "content": input_content},
]
answer = sample["answer"]
output_content, avg_time = model.generate(
conversation, 500, topp_=1.0, topk_=1, temperature_=1.0
)
print("标准答案:", answer)
answers_list.append(
{"id": sample["id"], "output_content": output_content, "answer": answer}
)
model.destroy_model_instance()
print("-------------------------------------------------------------")
true_num = 0
all_num = 0
for cont in answers_list:
id = cont["id"]
output = cont["output_content"]
answer = cont["answer"]
all_num = all_num + 1
position = 0
ABCD = output[position : position + 2]
if answer in ABCD:
true_num = true_num + 1
print(f"id {id} : ", "正确")
else:
print(f"id {id}: ", "错误")
print(f"成绩: {true_num}/{all_num}", true_num / all_num)
if __name__ == "__main__":
test()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment