Commit f7db21eb authored by lvzhen's avatar lvzhen
Browse files

first

parents
Pipeline #1580 canceled with stages
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
GLOBAL_BATCH_SIZE = 8
MICRO_BATCH_SIZE = 1
def get_train_ds_config(offload, stage=2, precision="fp16"):
device = "cpu" if offload else "none"
zero_opt_dict = {
"stage": stage,
"offload_param": {
"device": device
},
"offload_optimizer": {
"device": device
},
"stage3_param_persistence_threshold": 1e4,
"stage3_max_live_parameters": 3e7,
"stage3_prefetch_bucket_size": 3e7,
}
ds_config = {
"train_batch_size": GLOBAL_BATCH_SIZE,
"train_micro_batch_size_per_gpu": MICRO_BATCH_SIZE,
"steps_per_print": 1,
"zero_optimization": zero_opt_dict,
"gradient_clipping": 1.0,
"prescale_gradients": False,
"wall_clock_breakdown": False,
"checkpoint": {
"use_node_local_storage": True
}
}
if precision == "fp16":
ds_config["fp16"] = {
"enabled": True,
"loss_scale": 0,
"loss_scale_window": 500,
"hysteresis": 2,
"min_loss_scale": 1,
"initial_scale_power":12
}
elif precision == "bf16":
ds_config["bf16"] = {"enabled": True}
else:
raise ValueError("Mixed Precision type must be selected")
return ds_config
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
import math
import torch
from torch import nn
import torch.nn.functional as F
from deepspeed.compression.helper import recursive_getattr, recursive_setattr
import deepspeed
class LinearLayer_LoRA(nn.Module):
# an simple implementation of LoRA
# for now only support Linear Layer
def __init__(self,
weight,
lora_dim=0,
lora_scaling=1,
lora_dropout=0,
bias=None):
super(LinearLayer_LoRA, self).__init__()
self.weight = weight
self.bias = bias
if lora_dim <= 0:
raise ValueError(
"You are training to use LoRA, whose reduced dim should be larger than 1"
)
try:
# for zero stage 3
rows, columns = weight.ds_shape
except:
rows, columns = weight.shape
self.lora_right_weight = nn.Parameter(torch.zeros(
columns,
lora_dim)) # apply transpose so in forward we do not need to
self.lora_left_weight = nn.Parameter(torch.zeros(lora_dim, rows))
self.lora_scaling = lora_scaling / lora_dim
if lora_dropout > 0:
self.lora_dropout = nn.Dropout(lora_dropout)
else:
self.lora_dropout = nn.Identity()
self.reset_parameters()
# disable the original weight gradient
self.weight.requires_grad = False
# fuse LoRA to the original weight
self.fuse_lora = False
def eval(self):
self.lora_dropout.eval()
def train(self, mode=True):
self.lora_dropout.train(mode)
def reset_parameters(self):
nn.init.kaiming_uniform_(self.lora_right_weight, a=math.sqrt(5))
nn.init.zeros_(self.lora_left_weight)
def fuse_lora_weight(self):
if not self.fuse_lora:
self.weight.data += self.lora_scaling * torch.matmul(
self.lora_left_weight.t(), self.lora_right_weight.t())
self.fuse_lora = True
def unfuse_lora_weight(self):
if self.fuse_lora:
self.weight.data -= self.lora_scaling * torch.matmul(
self.lora_left_weight.t(), self.lora_right_weight.t())
self.fuse_lora = False
def forward(self, input):
if self.fuse_lora:
return F.linear(input, self.weight, self.bias)
else:
return F.linear(
input, self.weight,
self.bias) + (self.lora_dropout(input) @ self.lora_right_weight
@ self.lora_left_weight) * self.lora_scaling
# convert the linear layer to LoRA
def convert_linear_layer_to_lora(model,
part_module_name,
lora_dim=0,
lora_scaling=1,
lora_dropout=0):
replace_name = []
for name, module in model.named_modules():
if isinstance(module, nn.Linear) and part_module_name in name:
replace_name.append(name)
for name in replace_name:
module = recursive_getattr(model, name)
tmp = LinearLayer_LoRA(
module.weight, lora_dim, lora_scaling, lora_dropout,
module.bias).to(module.weight.device).to(module.weight.dtype)
recursive_setattr(model, name, tmp)
return model
def mark_only_lora_as_trainable(model: nn.Module, bias: str = 'none') -> None:
for n, p in model.named_parameters():
if 'lora_' not in n:
p.requires_grad = False
if bias == 'none':
return
elif bias == 'all':
for n, p in model.named_parameters():
if 'bias' in n:
p.requires_grad = True
elif bias == 'lora_only':
for m in model.modules():
if isinstance(m, LinearLayer_LoRA) and \
hasattr(m, 'bias') and \
m.bias is not None:
m.bias.requires_grad = True
else:
raise NotImplementedError
def _z3_params_to_fetch(param_list):
return [
p for p in param_list
if hasattr(p, 'ds_id') and p.ds_status == deepspeed.runtime.zero.
partition_parameters.ZeroParamStatus.NOT_AVAILABLE
]
# convert the LoRA layer to linear layer
def convert_lora_to_linear_layer(model):
replace_name = []
for name, module in model.named_modules():
if isinstance(module, LinearLayer_LoRA):
replace_name.append(name)
for name in replace_name:
module = recursive_getattr(model, name)
zero_stage_3 = hasattr(module.weight, 'ds_id')
with deepspeed.zero.GatheredParameters(_z3_params_to_fetch([
module.weight, module.bias, module.lora_left_weight,
module.lora_right_weight
]),
modifier_rank=0,
enabled=zero_stage_3):
module.fuse_lora_weight()
return model
def recover_lora(model):
replace_name = []
for name, module in model.named_modules():
if isinstance(module, LinearLayer_LoRA):
replace_name.append(name)
for name in replace_name:
module = recursive_getattr(model, name)
zero_stage_3 = hasattr(module.weight, 'ds_id')
with deepspeed.zero.GatheredParameters(_z3_params_to_fetch([
module.weight, module.bias, module.lora_left_weight,
module.lora_right_weight
]),
modifier_rank=0,
enabled=zero_stage_3):
# module.fuse_lora_weight()
module.unfuse_lora_weight()
return model
def only_optimize_lora_parameters(model):
# turn off the gradient of all the parameters except the LoRA parameters
for name, param in model.named_parameters():
if "lora_right_weight" in name or "lora_left_weight" in name:
param.requires_grad = True
else:
param.requires_grad = False
return model
def make_model_gradient_checkpointing_compatible(model):
# Higgingface added this enable input require grads function to make gradient checkpointing work for lora-only optimization
if hasattr(model, "enable_input_require_grads"):
model.enable_input_require_grads()
elif hasattr(model, "get_input_embeddings"):
def make_inputs_require_grad(module, input, output):
output.requires_grad_(True)
model.get_input_embeddings().register_forward_hook(
make_inputs_require_grad)
return model
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
import os
import torch
import random
import numpy as np
from transformers import set_seed, AutoTokenizer
import deepspeed
from deepspeed.runtime.zero.partition_parameters import ZeroParamStatus
import re
import shutil
def print_rank_0(msg, rank=0):
if rank <= 0:
print(msg)
def to_device(batch, device):
output = {}
for k, v in batch.items():
try:
output[k] = v.to(device)
except:
output[k] = v
return output
def get_dtype_size(dtype):
if dtype == torch.bool:
return 1 / 8
bit_search = re.search("[^\d](\d+)$", str(dtype))
if bit_search is None:
raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
bit_size = int(bit_search.groups()[0])
return bit_size // 8
def save_hf_format(model, tokenizer, args, sub_folder=""):
model_to_save = model.module if hasattr(model, 'module') else model
CONFIG_NAME = "config.json"
output_dir = os.path.join(args.output_dir, sub_folder)
os.makedirs(output_dir, exist_ok=True)
output_config_file = os.path.join(output_dir, CONFIG_NAME)
save_dict = model_to_save.state_dict()
for key in list(save_dict.keys()):
if "lora_" in key:
del save_dict[key]
model_to_save.save_pretrained(output_dir, state_dict=save_dict)
model_to_save.config.to_json_file(output_config_file)
tokenizer.save_pretrained(output_dir)
# for models not in AutoModel, copy python module files
train_from_model_path = model_to_save.config._name_or_path
if os.path.exists(train_from_model_path):
for filename in os.listdir(train_from_model_path):
if filename.endswith(".py"):
shutil.copy(os.path.join(train_from_model_path, filename), os.path.join(output_dir, filename))
def set_random_seed(seed):
if seed is not None:
set_seed(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def get_all_reduce_mean(tensor):
torch.distributed.all_reduce(tensor, op=torch.distributed.ReduceOp.SUM)
tensor = tensor / torch.distributed.get_world_size()
return tensor
def get_optimizer_grouped_parameters(model,
weight_decay,
no_decay_name_list=[
"bias", "LayerNorm.weight"
]):
optimizer_grouped_parameters = [
{
"params": [
p for n, p in model.named_parameters()
if (not any(nd in n
for nd in no_decay_name_list) and p.requires_grad)
],
"weight_decay":
weight_decay,
},
{
"params": [
p for n, p in model.named_parameters()
if (any(nd in n
for nd in no_decay_name_list) and p.requires_grad)
],
"weight_decay":
0.0,
},
]
return optimizer_grouped_parameters
def _z3_params_to_fetch(param_list):
return [
p for p in param_list
if hasattr(p, 'ds_id') and p.ds_status == ZeroParamStatus.NOT_AVAILABLE
]
def save_zero_three_model(model, tokenizer, args, sub_folder=""):
zero_stage_3 = (args.zero_stage == 3)
os.makedirs(args.output_dir, exist_ok=True)
if args.global_rank == 0:
output_dir = os.path.join(args.output_dir, sub_folder)
os.makedirs(output_dir, exist_ok=True)
model_to_save = model.module if hasattr(model, 'module') else model
if zero_stage_3:
output_state_dict = {}
for k, v in model_to_save.named_parameters():
if hasattr(v, 'ds_id'):
with deepspeed.zero.GatheredParameters(_z3_params_to_fetch([v]),
enabled=zero_stage_3):
v_p = v.data.cpu()
else:
v_p = v.cpu()
if args.global_rank == 0 and "lora" not in k:
output_state_dict[k] = v_p
if args.global_rank == 0:
model_to_save.save_pretrained(output_dir, state_dict=output_state_dict)
del output_state_dict
if args.global_rank == 0:
output_config_file = os.path.join(output_dir, "config.json")
model_to_save.config.to_json_file(output_config_file)
tokenizer.save_pretrained(output_dir)
# for models not in AutoModel, copy python module files
train_from_model_path = model_to_save.config._name_or_path
if os.path.exists(train_from_model_path):
for filename in os.listdir(train_from_model_path):
if filename.endswith(".py"):
shutil.copy(os.path.join(train_from_model_path, filename), os.path.join(output_dir, filename))
# SFT参数解读以及注意事项
## SFT参数解读
```python
#!/bin/bash
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 # 指定卡数
OUTPUT=telechat-single-node-test # 输出路径
ZERO_STAGE=3 # ZERO阶段
if [ "$OUTPUT" == "" ]; then
OUTPUT=./output
fi
if [ "$ZERO_STAGE" == "" ]; then
ZERO_STAGE=3
fi
mkdir -p $OUTPUT
deepspeed --master_port 29500 main.py \ # 指定端口号
--data_path ${DATA_OUTPUT_PATH}/train_data.pt \ # tokenzie后的数据文件
--model_name_or_path ../../models/12B \ #模型路径
--with_loss_mask \ # 是否开启loss mask
--per_device_train_batch_size 1 \ # 每张卡batch size个数
--max_seq_len 4096 \ # 训练上下文长度
--learning_rate 3e-5 \ # 学习率
--weight_decay 0.0001 \ # 权重衰减
--num_train_epochs 1 \ # epoch数
--gradient_accumulation_steps 4 \ # 梯度累积步数
--lr_scheduler_type cosine \
--precision fp16 \ # 训练精度,fp16、bf16
--warmup_proportion 0.1 \ # warm up 比率
--gradient_checkpointing \ # 梯度检查
--offload \ # 是否开启cpu_offload
--seed 1233 \
--zero_stage $ZERO_STAGE \
--save_steps 10 \ # 保存步数
--deepspeed \
--output_dir $OUTPUT # 输出路径
```
## 注意事项
* 微调阶段主要涉及到Zero显存优化技术,Zero不同阶段分别将优化器状态、模型梯度、模型参数平均切分到每一个gpu上,Zero1切分优化器状态,Zero2切分优化器状态、模型梯度,Zero3切分优化器状态、模型梯度、模型参数。
* 此外,gradient chekpoint和cpu offload也可以帮助节省显存(cpu offload需与Zero3同时开启)
* global_batch_size的计算公式为 per_device_train_batch_size * gpu数量 * gradient_accumulation_steps,在上述代码中,global_batch_size = 1 * 8 * 4 = 32。save_steps按照global batch size步数保存模型,比如上述示例每过32 * 10 = 320个samples保存一次
* with_loss_mask表示在训练阶段只对回答部分计算loss,可以提升模型的回复质量
* precision选择fp16或bf16混合精度训练
* 训练时,可在模型路径下的config.json中设置flash-attn=true开启Flash attention,能够节省显存,加速训练
* Zero stage=3, gradient_checkpointing=True, flash_attn=true,实测单机8卡A100-40G可训练4096长度,双机16卡可训练8192长度
* 保存的模型为huggingface格式,可直接加载推理
* lora通过在线性层上添加低秩矩阵,从而达到大幅节省训练所需的参数量,其中lora_dim是矩阵的秩 (lora_dim=8是相对较佳设置),lora_module_name表示添加lora的线性层,
mark_only_lora_as_trainable表示是否只在加lora的层上计算梯度
* 以7B为例,在models/7B/config.json中,可以开启flash-attention技术 (可显著降低显存,同时提升训练速度),但flash-attention不支持Tesla V100架构。因此,在使用V100进行训练时,需把config.json中的**flash_attn**设置为**false**
* 如在使用时希望指定gpu数量,请使用**export CUDA_VISIBLE_DEVICES**进行更改
* 全量微调多节点运行时,务必保证节点之间互联;各节点上的代码和数据一致,包括内容一致与位置一致
# 快速开始
本教程旨在帮助使用者快速进行Telechat模型的部署开发,主要包括:
## 资源获取
### 镜像下载
为了便于大家快速上手,我们提供了可运行的环境镜像,下载地址:[镜像下载](https://cloud.189.cn/t/EbAriaQfa2mm) (访问码:2uik)
### 模型权重下载
| 模型版本 | 下载链接 |
|---------| ----------------- |
| 7B-FP16 | [TeleChat-FP16](https://huggingface.co/Tele-AI/Telechat-7B) |
| 7B-int8 | [TeleChat-int8](https://huggingface.co/Tele-AI/Telechat-7B-int8) |
| 7B-int4 | [TeleChat-int4](https://huggingface.co/Tele-AI/Telechat-7B-int4) |
## 环境配置
### 镜像开发
**教程中1.2版本为例,后续请修改版本号。**
获取镜像,并完成解压得到tar文件。
![镜像tar包](../images/镜像tar包.png)
导入镜像包
```shell
sudo docker load -i telechat-public_1.2.tar
```
![镜像导入](../images/镜像导入过程.png)
启动容器,其中NVIDIA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7代表挂载编号0-7的8张GPU显卡,请自行修改
```shell
sudo docker run -itd --name telechat --runtime=nvidia --shm-size=256g -e NVIDIA_DRIVER_CAPABILITIES=compute,utility -e NVIDIA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 telechat-public:1.2 bash
```
![容器启动](../images/容器启动.png)
复制代码和镜像到容器内
- 例如 复制本地模型文件夹 telechat_opensource_7B_huggingface 到 telechat容器的/home下
- 例如 复制代码文件夹 Telechat 到 telechat容器的/home下
```shell
sudo docker cp telechat_opensource_7B_huggingface telechat:/home/. && sudo docker cp TeleChat telechat:/home/.
```
进入容器
```shell
sudo docker exec -it telechat bash
```
![挂载模型和代码](../images/挂载模型代码.png)
## 模型推理
进入Telechat/inference_telechat
```shell
python3 telechat_infer_demo.py
```
![直接推理结果](../images/直接推理结果.png)
### 长文外推
我们通过使用NTK-aware外推和attention scaling的方法,能够将在8K长度上微调的模型在推理时外推到96K的长度。下表展示了TeleChat-7B模型在不同长度wikipedia数据上困惑度,可以观察到同时使用NTK-aware外推和attention scaling方法时,TeleChat在96K的推理长度上依然具有较低的困惑度。
| | 2048 | 4096 | 8192 | 16384 | 32768 | 65536 | 98304 |
| ---------------------------------- | ------ | ------ | ------- | ------- | -------- | -------- | -------- |
| baseline | 4.8122 | 4.6562 | 39.3099 | 98.3102 | 155.2708 | 487.3398 | 447.6295 |
| NTK-aware (8k) | 4.8122 | 4.6562 | 5.1904 | 4.7155 | 8.6351 | 77.7478 | 79.9256 |
| NTK-aware+attention scaling (8k) | 4.8122 | 4.6562 | 5.1904 | 4.0353 | 4.1408 | 9.4080 | 7.9711 |
| NTK-aware (16k) | 7.6916 | 7.9900 | 7.9580 | 5.1217 | 4.7932 | 10.5444 | 10.3614 |
| NTK-aware+attention scaling (16k) | 7.6916 | 7.9900 | 7.9580 | 5.1217 | 4.7195 | 8.9751 | 7.6822 |
当然,您也可以在更长的长度上微调TeleChat,使之具备更强的外推能力。微调之后,只需**将模型的`config.json`配置文件中的`training_seqlen`字段修改为微调时的训练长度**即可进行推理。上表的第4、5行展示了将TeleChat-7B在16K长度上微调之后的困惑度,观察到在64K以上的推理长度上具有更低的困惑度。
## 模型微调
模型微调分为全参数微调和lora微调两种方式:
### LoRA微调
**进入`deepspeed-telechat/sft`路径**, 按照下述命令运行,启动基于DeepSpeed LoRA微调。
```shell
bash run_telechat_lora.sh
```
### 全参数微调
**进入`deepspeed-telechat/sft`路径**,按照下述命令运行,启动基于DeepSpeed的全参数微调。
单节点运行脚本
```shell
bash run_telechat_single_node.sh
```
### 微调后推理测试
**进入`inference_telechat/`路径**,修改telechat_infer_demo.py中PATH为上一步保存的模型路径文件,随后,按照下述命令运行,进行模型的推理
```shell
python telechat_infer_demo.py
```
## 模型量化
### GPTQ量化
进入Telechat/quant
```shell
python quant.py
```
![量化结果](../images/量化结果.png)
### 量化推理
调用推理
```shell
python telechat_quantized_infer_demo.py
```
![量化推理结果](../images/量化推理结果.png)
## 服务化
提供了两种简单的**单并发场景**服务化方式,支持流式返回
### API
进入service 文件夹
```shell
python telechat_service.py
```
![API](../images/api页面.png)
默认在0.0.0.0:8070会启动telechat服务,可以使用test_json.py,test_stream.py进行测试
其它机器访问服务,需要修改0.0.0.0为服务机器IP。
### WEB
在完成API部署后,运行
```shell
streamlit run webdemo.py
```
![API](../images/web页面.png)
默认在0.0.0.0:8501
其它机器访问服务,需要修改0.0.0.0为服务机器IP。
\ No newline at end of file
# 榜单复现方法
### CEVAL
首先,下载CEVAL数据集并解压:
```shell
wget https://huggingface.co/datasets/ceval/ceval-exam/resolve/main/ceval-exam.zip
unzip ceval-exam.zip
```
之后运行预测脚本:
```python
python score_CEVAL.py --path /path/to/ckpt --five_shot
```
得到提交结果`submission.json`,并提交到[CEVAL官方网站](https://cevalbenchmark.com/)上得到评测结果。
### MMLU
数据下载路径:https://github.com/hendrycks/test?tab=readme-ov-file
之后运行预测脚本:
```python
python score_MMLU.py
```
\ No newline at end of file
import os
import re
import time
import torch
import json
import argparse
import pandas as pd
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
TASK2DESC = {
"high_school_physics": "高中物理",
"fire_engineer": "注册消防工程师",
"computer_network": "计算机网络",
"advanced_mathematics": "高等数学",
"logic": "逻辑学",
"middle_school_physics": "初中物理",
"clinical_medicine": "临床医学",
"probability_and_statistics": "概率统计",
"ideological_and_moral_cultivation": "思想道德修养与法律基础",
"operating_system": "操作系统",
"middle_school_mathematics": "初中数学",
"chinese_language_and_literature": "中国语言文学",
"electrical_engineer": "注册电气工程师",
"business_administration": "工商管理",
"high_school_geography": "高中地理",
"modern_chinese_history": "近代史纲要",
"legal_professional": "法律职业资格",
"middle_school_geography": "初中地理",
"middle_school_chemistry": "初中化学",
"high_school_biology": "高中生物",
"high_school_chemistry": "高中化学",
"physician": "医师资格",
"high_school_chinese": "高中语文",
"tax_accountant": "税务师",
"high_school_history": "高中历史",
"mao_zedong_thought": "毛泽东思想和中国特色社会主义理论概论",
"high_school_mathematics": "高中数学",
"professional_tour_guide": "导游资格",
"veterinary_medicine": "兽医学",
"environmental_impact_assessment_engineer": "环境影响评价工程师",
"basic_medicine": "基础医学",
"education_science": "教育学",
"urban_and_rural_planner": "注册城乡规划师",
"middle_school_biology": "初中生物",
"plant_protection": "植物保护",
"middle_school_history": "初中历史",
"high_school_politics": "高中政治",
"metrology_engineer": "注册计量师",
"art_studies": "艺术学",
"college_economics": "大学经济学",
"college_chemistry": "大学化学",
"law": "法学",
"sports_science": "体育学",
"civil_servant": "公务员",
"college_programming": "大学编程",
"middle_school_politics": "初中政治",
"teacher_qualification": "教师资格",
"computer_architecture": "计算机组成",
"college_physics": "大学物理",
"discrete_mathematics": "离散数学",
"marxism": "马克思主义基本原理",
"accountant": "注册会计师",
}
def build_example(question, A, B, C, D, with_answer: bool = True):
choice = "\n".join(
[
"A. " + A,
"B. " + B,
"C. " + C,
"D. " + D,
]
)
answer = data["answer"].strip().upper() if with_answer else ""
return f"{question}\n{choice}\n答案:{answer}"
def extract_answer_option(text):
patterns = [
"答案是?\s?([ABCD])",
"答案是?\s?:([ABCD])",
"答案是?\s?:([ABCD])",
"答案应该?是\s?([ABCD])",
"答案应该?选\s?([ABCD])",
"答案为\s?([ABCD])",
"选择\s?([ABCD])",
"只有选?项?\s?([ABCD])\s?是?对",
"只有选?项?\s?([ABCD])\s?是?错",
"只有选?项?\s?([ABCD])\s?不?正确",
"只有选?项?\s?([ABCD])\s?错误",
"说法不?对选?项?的?是\s?([ABCD])",
"说法不?正确选?项?的?是\s?([ABCD])",
"说法错误选?项?的?是\s?([ABCD])",
"([ABCD])\s?是正确的",
"([ABCD])\s?是正确答案",
"选项\s?([ABCD])\s?正确",
"所以答\s?([ABCD])",
"1.\s?([ABCD])[.。$]?$",
"所以\s?([ABCD][.。$]?$)",
"所有\s?([ABCD][.。$]?$)",
"[\s,::,]([ABCD])[。,,\.]?$",
"[\s,,::][故即]([ABCD])[。\.]?$",
"[\s,,::]因此([ABCD])[。\.]?$",
"[是为。]\s?([ABCD])[。\.]?$",
"因此\s?([ABCD])[。\.]?$",
"显然\s?([ABCD])[。\.]?$",
"1.\s?(.*?)$",
"答案是\s?(\S+)(?:。|$)",
"答案应该是\s?(\S+)(?:。|$)",
"答案为\s?(\S+)(?:。|$)",
]
regexes = [re.compile(pattern) for pattern in patterns]
for regex in regexes:
match = regex.search(text)
if match:
return match.group(1)
for i in text:
if i in "ABCD": return i
return "C"
def get_args():
parser = argparse.ArgumentParser(
'Evaluation',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
group = parser.add_argument_group('EVAL Task Parameters')
group.add_argument(
'--five_shot', action="store_true")
group.add_argument(
'--path', type=str, default=None)
args = parser.parse_args()
return args
args = get_args()
PATH = args.path
tokenizer = AutoTokenizer.from_pretrained(PATH,trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(PATH, trust_remote_code=True, device_map="auto",torch_dtype=torch.float16)
model.eval()
submit_dict = {}
filenames = os.listdir("test")
subject_list = [test_file.replace("_test.csv","") for test_file in filenames]
for index,subject_name in enumerate(subject_list):
submit_dict[subject_name] = {}
test_file_path=os.path.join('test',f'{subject_name}_test.csv')
test_df=pd.read_csv(test_file_path)
for idx in tqdm(range(len(test_df))):
id = test_df["id"][idx]
question = test_df["question"][idx]
choice_A = test_df["A"][idx]
choice_B = test_df["B"][idx]
choice_C = test_df["C"][idx]
choice_D = test_df["D"][idx]
prompt = f"以下是中国关于{TASK2DESC[subject_name]}考试的单项选择题,请选出其中的正确答案。\n\n"
prompt += build_example(question,choice_A,choice_B,choice_C,choice_D,with_answer=False)
five_shot_prompt = '''<_user>以下是中国关于大学经济学考试的单项选择题,请从A、B、C、D四个选项中选出其中的正确答案。\n问题:考虑以下小型开放经济的数据:Y=1000,C=700,G=150,I=250-1000r*。如果世界利率为5%,那么小型开放经济的净出口为____。\nA. 50\nB. -50\nC. 150\nD. -150<_bot>答案: B<_end><_user>问题:已知一垄断企业成本函数为TC=5Q2+20Q+10,产品的需求函数为Q=140-P。利润最大化的产量为____。]\nA. 10\nB. 5\nC. 3\nD. 15<_bot>答案: A<_end><_user>问题:如果消费与实际利率负相关,则______。\nA. IS曲线更平坦\nB. IS曲线更陡峭\nC. LM曲线更平坦\nD. LM曲线更陡峭<_bot>答案: A<_end>问题:如果所有产品的生产都增加了10%,且所有价格都下降了10%,会发生以下哪一种情况?____\nA. 真实GDP增加10%,名义GDP减少10%\nB. 真实GDP增加10%,名义GDP不变\nC. 真实GDP不变,名义GDP增加10%\nD. 真实GDP不变,名义GDP减少10%<_bot>答案: B<_end><_user>问题:如果边际技术替代率MRTSLK小于劳动与资本的价格之比,为使成本最小,该厂商应该____。\nA. 同时增加资本和劳动\nB. 同时减少资本和劳动\nC. 减少资本,增加劳动\nD. 增加资本,减少劳动<_bot>答案: D<_end>'''
if args.five_shot:
prompt = five_shot_prompt + "<_user>" + prompt + "<_bot>"
else:
prompt = "<_user>" + prompt + "<_bot>"
context_ids = tokenizer(prompt, return_tensors="pt")
output = model.generate(context_ids["input_ids"].to(0), max_new_tokens=2000, temperature=0.2, top_p=0.95,
repetition_penalty=1.0, do_sample=False, eos_token_id=[160133, 160130])
output_str = tokenizer.decode(output[0].tolist()).split("<_bot>")[-1]
answer_extracted = extract_answer_option(output_str)
submit_dict[subject_name][str(id)] = answer_extracted
with open("submission.json", 'w', encoding='utf-8') as f:
json.dump(submit_dict, f, ensure_ascii=False, indent=4)
import pandas as pd
import os,torch,re,jsonlines
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
PATH = "../models/7B" #checkpoint path
mmlu_dataset = "../mmlu/" #dataset path
def get_few_shot_prompt(filename):
filepath = os.path.join(mmlu_dataset,"dev",filename.replace("test","dev"))
df = pd.read_csv(filepath,header=None,names=["input","A","B","C","D","answer"])
prompts = []
_hint = f'There is a single choice question about {filename.replace("_test.csv", " ")}. Answer the question by replying A, B, C or D.'
for i in range(len(df)):
line = df.iloc[i]
user_content = f"{_hint}\nQuestion: {line['input']}\nA. {line['A']}\nB. {line['B']}\nC. {line['C']}\nD. {line['D']}\nAnswer: "
bot_content = f"{line[line['answer']]}\n"
prompts.extend([{"role": "user", "content": user_content},{"role": "bot", "content": bot_content}])
return prompts[:10]
# get result
def get_input_data(test_file_path,filename):
_hint = f'There is a single choice question about {filename.replace("_test.csv", " ")}. Answer the question by replying A, B, C or D.'
df = pd.read_csv(os.path.join(test_file_path,filename),header=None,names=["input","A","B","C","D","answer"])
data = []
for i in range(len(df)):
line = df.iloc[i]
input = f"{_hint}\nQuestion: {line['input']}\nA. {line['A']}\nB. {line['B']}\nC. {line['C']}\nD. {line['D']}\nAnswer: "
target = line["answer"]
data.append({"input":input,"target":target})
return data
# post process
def get_capital_answer(text):
patterns = [
"the answer is ([A-E])",
"the answer is([A-E])",
"Answer: ([A-E])",
"Answer: \(([A-E])\)",
"Option \(([A-E])\)",
"Answer:([A-E])",
"Option ([A-E])",
"Opt ([A-E])"
]
for pattern in patterns:
match = re.search(pattern,text,re.IGNORECASE)
if match:
return match.group(1)
match = re.findall("[A-D]", text)
if match:
return match[0]
return ""
if __name__ == "__main__":
tokenizer = AutoTokenizer.from_pretrained(PATH)
model = AutoModelForCausalLM.from_pretrained(PATH, trust_remote_code=True, device_map="auto",
torch_dtype=torch.float16)
generate_config = GenerationConfig.from_pretrained(PATH)
generate_config.temperature = 0.1
generate_config.top_k = 50
generate_config.top_p = 0.95
print(generate_config)
model.eval()
test_file_path = mmlu_dataset + "test"
filenames = os.listdir(test_file_path)
score_list = []
for filename in filenames:
score, total = 0, 0
few_shot_prompt = get_few_shot_prompt(filename)
input_list = get_input_data(test_file_path,filename)
for line in tqdm(input_list):
answer, history = model.chat(tokenizer = tokenizer, question=line["input"], history=few_shot_prompt, generation_config = generate_config,stream=False)
answer = get_capital_answer(answer)
if answer == line["target"]:
score +=1
total += 1
score_list.append(score/total)
final_score = sum(score_list)/len(score_list)
print(final_score)
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
{"input": "提供一份优秀的演讲模板。"}
{"input": "描述一片自然景观,例如海滩或山脉。"}
{"input": "设计一份高质量的舞蹈表演节目,包括音乐、服装、舞蹈编排。"}
{"input": "如果一个石头从8米高的悬崖上落下,它将在多长时间内落地?(不考虑空气阻力)"}
{"input": "什么是量子力学?为什么它很重要?"}
{"input": "说明如何在Java中实现一个简单的数据库连接池。"}
{"input": "提供几条实用的时尚搭配建议,例如如何穿出不俗的商务装扮以及如何配饰一身休闲装。"}
{"input": "揭示航天器在太空中运行的基本原理和过程。"}
{"input": "想出一个关于人生的格言,这个格言应该启发他人,包含智慧和洞察。"}
{"input": "岩手县的公共交通如何发展?"}
{"input": "建议加强家庭关系的方法和技巧"}
{"input": "是不是所有的鱼都能呼吸空气?"}
{"input": "如果把鸡蛋煮到牛奶里,可以煮出牛奶蛋吗?"}
{"input": "生成一段对于如何训练宠物成为嗅探犬的详细说明性文本。"}
{"input": "解释一下Oscar Awards的历史。"}
{"input": "根据以下条件对商品进行分类:价格小于等于100元,则为A类;价格在101~500元之间,则为B类;价格大于500元,则为C类。\n商品列表:[{name: 'book', price: 50}, {name: 'bag', price: 200}, {name: 'TV', price: 800}]"}
{"input": "提供一份关于如何防火的文档。"}
{"input": "如何提高房屋的增值潜力?"}
{"input": "编写一篇描绘性专栏文章的作品。"}
{"input": "在知名论坛上发布一篇关于城市化的文章,阐述城市化对人口、环境和经济的影响。"}
{"input": "为什么有些人会感到孤独?该如何摆脱孤独?"}
{"input": "指点一个菜鸟驾驶员如何开车上高速公路。"}
{"input": "手机使用时附近有火药味会导致手机爆炸,这种说法正确吗?"}
{"input": "描述一幢建筑的外观和内部布局。"}
{"input": "描述一个理想的家具布置方案,以最大程度地利用空间。"}
{"input": "你是哪个团队开发的?"}
{"input": "你跟中国电信是什么关系?"}
{"input": "介绍下TeleChat"}
{"input": "介绍下自己"}
{"input": "你好"}
{"input": "谁给你起的名字"}
{"input": "电信主卡和副卡的区别在哪里?"}
{"input": "说一下电信5G使用场景有几种场景?"}
{"input": "那你会什么"}
{"input": "英语你会吗"}
{"input": "撰写一篇以《提高自主创新》为标题的申论,字数在700左右"}
{"input": "请以《发展》为题,写一篇1000字的申论文章"}
{"input": "给我写一个快速排序的代码,用java"}
{"input": "请编写一个Java程序,在屏幕上输出从1到10的所有整数。"}
{"input": "请编写一段代码来检查一个字符串是否为回文字符串。\n如:'racecar'"}
{"input": "小明买了10支笔和6个橡皮,如果每支笔5元,\n\n每个橡皮2元,他一共花了多少元?"}
{"input": "人们因技术发展得以更好地掌控时间,但也有人因此成了时间的仆人。这句话引发了你怎样的联想与思考?请写一篇文章。\n要求:选准角度,确定立意,明确文体,自拟标题;不要套作,不得抄袭;不得泄露个人信息;不少于800字。"}
{"input": "评价一下姚明"}
{"input": "我是一名老师,帮忙生成一个主题为“好好学习,积极向上”的朋友圈短文案,要求内容幽默风趣不死板。"}
{"input": "设计一个时尚品牌的标志,符合品牌价值观和目标受众。"}
{"input": "说明如何创造一个双人浪漫的晚餐。"}
{"input": "编写一份公司年度财务报表"}
{"input": "解释什么是汽车发动机机油,机油的作用和更换周期。"}
{"input": "为减轻颈椎病症状,提供五个有效养生方法,并解释其机理。"}
{"input": "针对失眠人群提出五个改善方法。"}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment