first

f7db21eb · lvzhen · f7db21eb · f7db21eb · f7db21eb · f7db21eb
Commit f7db21eb authored Aug 22, 2024 by lvzhen
20 changed files
--- a/deepspeed-telechat/utils/ds_utils.py
+++ b/deepspeed-telechat/utils/ds_utils.py
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+GLOBAL_BATCH_SIZE = 8
+MICRO_BATCH_SIZE = 1
+
+
+def get_train_ds_config(offload, stage=2, precision="fp16"):
+
+    device = "cpu" if offload else "none"
+    zero_opt_dict = {
+        "stage": stage,
+        "offload_param": {
+            "device": device
+        },
+        "offload_optimizer": {
+            "device": device
+        },
+        "stage3_param_persistence_threshold": 1e4,
+        "stage3_max_live_parameters": 3e7,
+        "stage3_prefetch_bucket_size": 3e7,
+    }
+    ds_config = {
+        "train_batch_size": GLOBAL_BATCH_SIZE,
+        "train_micro_batch_size_per_gpu": MICRO_BATCH_SIZE,
+        "steps_per_print": 1,
+        "zero_optimization": zero_opt_dict,
+        "gradient_clipping": 1.0,
+        "prescale_gradients": False,
+        "wall_clock_breakdown": False,
+        "checkpoint": {
+            "use_node_local_storage": True
+        }
+
+    }
+    if precision == "fp16":
+        ds_config["fp16"] = {
+            "enabled": True,
+            "loss_scale": 0,
+            "loss_scale_window": 500,
+            "hysteresis": 2,
+            "min_loss_scale": 1,
+            "initial_scale_power":12
+        }
+    elif precision == "bf16":
+        ds_config["bf16"] = {"enabled": True}
+    else:
+        raise ValueError("Mixed Precision type must be selected")
+    return ds_config
--- a/deepspeed-telechat/utils/module/__pycache__/lora.cpython-310.pyc
+++ b/deepspeed-telechat/utils/module/__pycache__/lora.cpython-310.pyc
--- a/deepspeed-telechat/utils/module/__pycache__/lora.cpython-38.pyc
+++ b/deepspeed-telechat/utils/module/__pycache__/lora.cpython-38.pyc
--- a/deepspeed-telechat/utils/module/lora.py
+++ b/deepspeed-telechat/utils/module/lora.py
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+import math
+import torch
+from torch import nn
+import torch.nn.functional as F
+from deepspeed.compression.helper import recursive_getattr, recursive_setattr
+import deepspeed
+
+
+class LinearLayer_LoRA(nn.Module):
+    # an simple implementation of LoRA
+    # for now only support Linear Layer
+    def __init__(self,
+                 weight,
+                 lora_dim=0,
+                 lora_scaling=1,
+                 lora_dropout=0,
+                 bias=None):
+        super(LinearLayer_LoRA, self).__init__()
+        self.weight = weight
+        self.bias = bias
+
+        if lora_dim <= 0:
+            raise ValueError(
+                "You are training to use LoRA, whose reduced dim should be larger than 1"
+            )
+
+        try:
+            # for zero stage 3
+            rows, columns = weight.ds_shape
+        except:
+            rows, columns = weight.shape
+        self.lora_right_weight = nn.Parameter(torch.zeros(
+            columns,
+            lora_dim))  # apply transpose so in forward we do not need to
+        self.lora_left_weight = nn.Parameter(torch.zeros(lora_dim, rows))
+        self.lora_scaling = lora_scaling / lora_dim
+
+        if lora_dropout > 0:
+            self.lora_dropout = nn.Dropout(lora_dropout)
+        else:
+            self.lora_dropout = nn.Identity()
+
+        self.reset_parameters()
+        # disable the original weight gradient
+        self.weight.requires_grad = False
+        # fuse LoRA to the original weight
+        self.fuse_lora = False
+
+    def eval(self):
+        self.lora_dropout.eval()
+
+
+    def train(self, mode=True):
+        self.lora_dropout.train(mode)
+
+    def reset_parameters(self):
+        nn.init.kaiming_uniform_(self.lora_right_weight, a=math.sqrt(5))
+        nn.init.zeros_(self.lora_left_weight)
+
+    def fuse_lora_weight(self):
+        if not self.fuse_lora:
+            self.weight.data += self.lora_scaling * torch.matmul(
+                self.lora_left_weight.t(), self.lora_right_weight.t())
+        self.fuse_lora = True
+
+    def unfuse_lora_weight(self):
+        if self.fuse_lora:
+            self.weight.data -= self.lora_scaling * torch.matmul(
+                self.lora_left_weight.t(), self.lora_right_weight.t())
+        self.fuse_lora = False
+
+    def forward(self, input):
+        if self.fuse_lora:
+            return F.linear(input, self.weight, self.bias)
+        else:
+            return F.linear(
+                input, self.weight,
+                self.bias) + (self.lora_dropout(input) @ self.lora_right_weight
+                              @ self.lora_left_weight) * self.lora_scaling
+
+
+# convert the linear layer to LoRA
+def convert_linear_layer_to_lora(model,
+                                 part_module_name,
+                                 lora_dim=0,
+                                 lora_scaling=1,
+                                 lora_dropout=0):
+    replace_name = []
+    for name, module in model.named_modules():
+        if isinstance(module, nn.Linear) and part_module_name in name:
+            replace_name.append(name)
+    for name in replace_name:
+        module = recursive_getattr(model, name)
+        tmp = LinearLayer_LoRA(
+            module.weight, lora_dim, lora_scaling, lora_dropout,
+            module.bias).to(module.weight.device).to(module.weight.dtype)
+        recursive_setattr(model, name, tmp)
+    return model
+
+
+def mark_only_lora_as_trainable(model: nn.Module, bias: str = 'none') -> None:
+    for n, p in model.named_parameters():
+        if 'lora_' not in n:
+            p.requires_grad = False
+    if bias == 'none':
+        return
+    elif bias == 'all':
+        for n, p in model.named_parameters():
+            if 'bias' in n:
+                p.requires_grad = True
+    elif bias == 'lora_only':
+        for m in model.modules():
+            if isinstance(m, LinearLayer_LoRA) and \
+                hasattr(m, 'bias') and \
+                m.bias is not None:
+                    m.bias.requires_grad = True
+    else:
+        raise NotImplementedError
+
+def _z3_params_to_fetch(param_list):
+    return [
+        p for p in param_list
+        if hasattr(p, 'ds_id') and p.ds_status == deepspeed.runtime.zero.
+        partition_parameters.ZeroParamStatus.NOT_AVAILABLE
+    ]
+
+
+# convert the LoRA layer to linear layer
+def convert_lora_to_linear_layer(model):
+    replace_name = []
+    for name, module in model.named_modules():
+        if isinstance(module, LinearLayer_LoRA):
+            replace_name.append(name)
+    for name in replace_name:
+        module = recursive_getattr(model, name)
+        zero_stage_3 = hasattr(module.weight, 'ds_id')
+        with deepspeed.zero.GatheredParameters(_z3_params_to_fetch([
+                module.weight, module.bias, module.lora_left_weight,
+                module.lora_right_weight
+        ]),
+                                               modifier_rank=0,
+                                               enabled=zero_stage_3):
+            module.fuse_lora_weight()
+    return model
+
+def recover_lora(model):
+    replace_name = []
+    for name, module in model.named_modules():
+        if isinstance(module, LinearLayer_LoRA):
+            replace_name.append(name)
+    for name in replace_name:
+        module = recursive_getattr(model, name)
+        zero_stage_3 = hasattr(module.weight, 'ds_id')
+        with deepspeed.zero.GatheredParameters(_z3_params_to_fetch([
+                module.weight, module.bias, module.lora_left_weight,
+                module.lora_right_weight
+        ]),
+                                               modifier_rank=0,
+                                               enabled=zero_stage_3):
+            # module.fuse_lora_weight()
+            module.unfuse_lora_weight()
+    return model
+
+
+def only_optimize_lora_parameters(model):
+    # turn off the gradient of all the parameters except the LoRA parameters
+    for name, param in model.named_parameters():
+        if "lora_right_weight" in name or "lora_left_weight" in name:
+            param.requires_grad = True
+        else:
+            param.requires_grad = False
+    return model
+
+def make_model_gradient_checkpointing_compatible(model):
+    # Higgingface added this enable input require grads function to make gradient checkpointing work for lora-only optimization
+    if hasattr(model, "enable_input_require_grads"):
+        model.enable_input_require_grads()
+    elif hasattr(model, "get_input_embeddings"):
+
+        def make_inputs_require_grad(module, input, output):
+            output.requires_grad_(True)
+
+        model.get_input_embeddings().register_forward_hook(
+            make_inputs_require_grad)
+    return model
--- a/deepspeed-telechat/utils/utils.py
+++ b/deepspeed-telechat/utils/utils.py
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import torch
+import random
+import numpy as np
+from transformers import set_seed, AutoTokenizer
+import deepspeed
+from deepspeed.runtime.zero.partition_parameters import ZeroParamStatus
+import re
+import shutil
+
+def print_rank_0(msg, rank=0):
+    if rank <= 0:
+        print(msg)
+
+
+def to_device(batch, device):
+    output = {}
+    for k, v in batch.items():
+        try:
+            output[k] = v.to(device)
+        except:
+            output[k] = v
+    return output
+
+def get_dtype_size(dtype):
+    if dtype == torch.bool:
+        return 1 / 8
+    bit_search = re.search("[^\d](\d+)$", str(dtype))
+    if bit_search is None:
+        raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
+    bit_size = int(bit_search.groups()[0])
+    return bit_size // 8
+
+def save_hf_format(model, tokenizer, args, sub_folder=""):
+    model_to_save = model.module if hasattr(model, 'module') else model
+    CONFIG_NAME = "config.json"
+    output_dir = os.path.join(args.output_dir, sub_folder)
+    os.makedirs(output_dir, exist_ok=True)
+    output_config_file = os.path.join(output_dir, CONFIG_NAME)
+    save_dict = model_to_save.state_dict()
+    for key in list(save_dict.keys()):
+        if "lora_" in key:
+            del save_dict[key]
+    model_to_save.save_pretrained(output_dir, state_dict=save_dict)
+    model_to_save.config.to_json_file(output_config_file)
+    tokenizer.save_pretrained(output_dir)
+    # for models not in AutoModel, copy python module files
+    train_from_model_path = model_to_save.config._name_or_path
+    if os.path.exists(train_from_model_path):
+        for filename in os.listdir(train_from_model_path):
+            if filename.endswith(".py"):
+                shutil.copy(os.path.join(train_from_model_path, filename), os.path.join(output_dir, filename))
+
+
+def set_random_seed(seed):
+    if seed is not None:
+        set_seed(seed)
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+
+
+def get_all_reduce_mean(tensor):
+    torch.distributed.all_reduce(tensor, op=torch.distributed.ReduceOp.SUM)
+    tensor = tensor / torch.distributed.get_world_size()
+    return tensor
+
+
+def get_optimizer_grouped_parameters(model,
+                                     weight_decay,
+                                     no_decay_name_list=[
+                                         "bias", "LayerNorm.weight"
+                                     ]):
+    optimizer_grouped_parameters = [
+        {
+            "params": [
+                p for n, p in model.named_parameters()
+                if (not any(nd in n
+                            for nd in no_decay_name_list) and p.requires_grad)
+            ],
+            "weight_decay":
+            weight_decay,
+        },
+        {
+            "params": [
+                p for n, p in model.named_parameters()
+                if (any(nd in n
+                        for nd in no_decay_name_list) and p.requires_grad)
+            ],
+            "weight_decay":
+            0.0,
+        },
+    ]
+    return optimizer_grouped_parameters
+
+
+def _z3_params_to_fetch(param_list):
+    return [
+        p for p in param_list
+        if hasattr(p, 'ds_id') and p.ds_status == ZeroParamStatus.NOT_AVAILABLE
+    ]
+
+def save_zero_three_model(model, tokenizer, args, sub_folder=""):
+    zero_stage_3 = (args.zero_stage == 3)
+    os.makedirs(args.output_dir, exist_ok=True)
+    if args.global_rank == 0:
+        output_dir = os.path.join(args.output_dir, sub_folder)
+        os.makedirs(output_dir, exist_ok=True)
+    model_to_save = model.module if hasattr(model, 'module') else model
+
+    if zero_stage_3:
+        output_state_dict = {}
+        for k, v in model_to_save.named_parameters():
+
+            if hasattr(v, 'ds_id'):
+                with deepspeed.zero.GatheredParameters(_z3_params_to_fetch([v]),
+                                                       enabled=zero_stage_3):
+                    v_p = v.data.cpu()
+            else:
+                v_p = v.cpu()
+            if args.global_rank == 0 and "lora" not in k:
+                output_state_dict[k] = v_p
+        if args.global_rank == 0:
+            model_to_save.save_pretrained(output_dir, state_dict=output_state_dict)
+        del output_state_dict
+
+    if args.global_rank == 0:
+        output_config_file = os.path.join(output_dir, "config.json")
+        model_to_save.config.to_json_file(output_config_file)
+        tokenizer.save_pretrained(output_dir)
+        # for models not in AutoModel, copy python module files
+        train_from_model_path = model_to_save.config._name_or_path
+        if os.path.exists(train_from_model_path):
+            for filename in os.listdir(train_from_model_path):
+                if filename.endswith(".py"):
+                    shutil.copy(os.path.join(train_from_model_path, filename), os.path.join(output_dir, filename))
--- a/docs/parameters.md
+++ b/docs/parameters.md
+# SFT参数解读以及注意事项
+
+## SFT参数解读
+
+```python
+#!/bin/bash
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 # 指定卡数
+OUTPUT=telechat-single-node-test # 输出路径
+ZERO_STAGE=3 # ZERO阶段
+ 
+if [ "$OUTPUT" == "" ]; then
+    OUTPUT=./output
+fi
+if [ "$ZERO_STAGE" == "" ]; then
+    ZERO_STAGE=3
+fi
+mkdir -p $OUTPUT
+ 
+deepspeed --master_port 29500 main.py \ # 指定端口号
+   --data_path ${DATA_OUTPUT_PATH}/train_data.pt  \ # tokenzie后的数据文件
+   --model_name_or_path ../../models/12B \ #模型路径
+   --with_loss_mask \ # 是否开启loss mask
+   --per_device_train_batch_size 1 \ # 每张卡batch size个数
+   --max_seq_len 4096 \ # 训练上下文长度
+   --learning_rate 3e-5 \ # 学习率
+   --weight_decay 0.0001 \ # 权重衰减
+   --num_train_epochs 1 \ # epoch数
+   --gradient_accumulation_steps 4 \ # 梯度累积步数
+   --lr_scheduler_type cosine \
+   --precision fp16 \ # 训练精度，fp16、bf16
+   --warmup_proportion 0.1 \ # warm up 比率
+   --gradient_checkpointing \ # 梯度检查
+   --offload \ # 是否开启cpu_offload
+   --seed 1233 \
+   --zero_stage $ZERO_STAGE \
+   --save_steps 10 \ # 保存步数
+   --deepspeed \
+   --output_dir $OUTPUT # 输出路径
+```
+
+
+## 注意事项
+
+* 微调阶段主要涉及到Zero显存优化技术，Zero不同阶段分别将优化器状态、模型梯度、模型参数平均切分到每一个gpu上，Zero1切分优化器状态，Zero2切分优化器状态、模型梯度，Zero3切分优化器状态、模型梯度、模型参数。
+* 此外，gradient chekpoint和cpu offload也可以帮助节省显存（cpu offload需与Zero3同时开启）
+* global_batch_size的计算公式为 per_device_train_batch_size * gpu数量 * gradient_accumulation_steps，在上述代码中，global_batch_size = 1 * 8 * 4 = 32。save_steps按照global batch size步数保存模型，比如上述示例每过32 * 10 = 320个samples保存一次
+* with_loss_mask表示在训练阶段只对回答部分计算loss，可以提升模型的回复质量
+* precision选择fp16或bf16混合精度训练
+* 训练时，可在模型路径下的config.json中设置flash-attn=true开启Flash attention，能够节省显存，加速训练
+* Zero stage=3, gradient_checkpointing=True, flash_attn=true，实测单机8卡A100-40G可训练4096长度，双机16卡可训练8192长度
+* 保存的模型为huggingface格式，可直接加载推理
+* lora通过在线性层上添加低秩矩阵，从而达到大幅节省训练所需的参数量，其中lora_dim是矩阵的秩 (lora_dim=8是相对较佳设置)，lora_module_name表示添加lora的线性层，
+mark_only_lora_as_trainable表示是否只在加lora的层上计算梯度
+* 以7B为例，在models/7B/config.json中，可以开启flash-attention技术 (可显著降低显存，同时提升训练速度)，但flash-attention不支持Tesla V100架构。因此，在使用V100进行训练时，需把config.json中的**flash_attn**设置为**false**
+* 如在使用时希望指定gpu数量，请使用**export CUDA_VISIBLE_DEVICES**进行更改
+* 全量微调多节点运行时，务必保证节点之间互联；各节点上的代码和数据一致，包括内容一致与位置一致 
--- a/docs/tutorial.md
+++ b/docs/tutorial.md
+# 快速开始
+
+本教程旨在帮助使用者快速进行Telechat模型的部署开发，主要包括：
+
+
+## 资源获取
+
+
+### 镜像下载
+
+为了便于大家快速上手，我们提供了可运行的环境镜像，下载地址：[镜像下载](https://cloud.189.cn/t/EbAriaQfa2mm) （访问码：2uik）
+
+### 模型权重下载
+
+
+| 模型版本  | 下载链接           |
+|---------| ----------------- |
+| 7B-FP16 | [TeleChat-FP16](https://huggingface.co/Tele-AI/Telechat-7B) |
+| 7B-int8 | [TeleChat-int8](https://huggingface.co/Tele-AI/Telechat-7B-int8) |
+| 7B-int4 | [TeleChat-int4](https://huggingface.co/Tele-AI/Telechat-7B-int4) |
+
+## 环境配置
+
+
+### 镜像开发
+**教程中1.2版本为例，后续请修改版本号。**
+
+获取镜像，并完成解压得到tar文件。
+
+![镜像tar包](../images/镜像tar包.png)
+
+导入镜像包
+
+```shell
+sudo docker load -i telechat-public_1.2.tar
+```
+
+![镜像导入](../images/镜像导入过程.png)
+
+启动容器，其中NVIDIA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7代表挂载编号0-7的8张GPU显卡，请自行修改
+
+```shell
+sudo docker run -itd  --name telechat --runtime=nvidia  --shm-size=256g -e NVIDIA_DRIVER_CAPABILITIES=compute,utility  -e NVIDIA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 telechat-public:1.2 bash
+```
+
+![容器启动](../images/容器启动.png)
+
+复制代码和镜像到容器内
+
+- 例如 复制本地模型文件夹 telechat_opensource_7B_huggingface 到 telechat容器的/home下
+- 例如 复制代码文件夹 Telechat 到 telechat容器的/home下
+
+```shell
+sudo docker cp telechat_opensource_7B_huggingface telechat:/home/. && sudo docker cp TeleChat telechat:/home/.
+```
+
+进入容器
+
+```shell
+sudo docker  exec -it telechat bash
+```
+
+![挂载模型和代码](../images/挂载模型代码.png)
+
+
+## 模型推理
+
+进入Telechat/inference_telechat
+
+```shell
+python3 telechat_infer_demo.py
+```
+
+![直接推理结果](../images/直接推理结果.png)
+
+### 长文外推
+
+我们通过使用NTK-aware外推和attention scaling的方法，能够将在8K长度上微调的模型在推理时外推到96K的长度。下表展示了TeleChat-7B模型在不同长度wikipedia数据上困惑度，可以观察到同时使用NTK-aware外推和attention scaling方法时，TeleChat在96K的推理长度上依然具有较低的困惑度。
+
+|                                    | 2048   | 4096   | 8192    | 16384   | 32768    | 65536    | 98304    |
+| ---------------------------------- | ------ | ------ | ------- | ------- | -------- | -------- | -------- |
+| baseline                           | 4.8122 | 4.6562 | 39.3099 | 98.3102 | 155.2708 | 487.3398 | 447.6295 |
+| NTK-aware (8k)                     | 4.8122 | 4.6562 | 5.1904  | 4.7155  | 8.6351   | 77.7478  | 79.9256  |
+| NTK-aware+attention  scaling (8k)  | 4.8122 | 4.6562 | 5.1904  | 4.0353  | 4.1408   | 9.4080   | 7.9711   |
+| NTK-aware (16k)                    | 7.6916 | 7.9900 | 7.9580  | 5.1217  | 4.7932   | 10.5444  | 10.3614  |
+| NTK-aware+attention  scaling (16k) | 7.6916 | 7.9900 | 7.9580  | 5.1217  | 4.7195   | 8.9751   | 7.6822   |
+
+当然，您也可以在更长的长度上微调TeleChat，使之具备更强的外推能力。微调之后，只需**将模型的`config.json`配置文件中的`training_seqlen`字段修改为微调时的训练长度**即可进行推理。上表的第4、5行展示了将TeleChat-7B在16K长度上微调之后的困惑度，观察到在64K以上的推理长度上具有更低的困惑度。
+
+## 模型微调
+
+模型微调分为全参数微调和lora微调两种方式：
+
+### LoRA微调
+
+**进入`deepspeed-telechat/sft`路径**， 按照下述命令运行，启动基于DeepSpeed LoRA微调。
+
+```shell
+bash run_telechat_lora.sh
+```
+
+### 全参数微调
+
+**进入`deepspeed-telechat/sft`路径**，按照下述命令运行，启动基于DeepSpeed的全参数微调。
+
+单节点运行脚本
+
+```shell
+bash run_telechat_single_node.sh
+```
+
+### 微调后推理测试
+
+**进入`inference_telechat/`路径**，修改telechat_infer_demo.py中PATH为上一步保存的模型路径文件，随后，按照下述命令运行，进行模型的推理
+
+```shell
+python telechat_infer_demo.py
+```
+
+## 模型量化
+
+### GPTQ量化
+
+进入Telechat/quant
+
+```shell
+python quant.py
+```
+
+![量化结果](../images/量化结果.png)
+
+### 量化推理
+
+调用推理
+
+```shell
+python telechat_quantized_infer_demo.py
+```
+
+![量化推理结果](../images/量化推理结果.png)
+
+## 服务化
+
+提供了两种简单的**单并发场景**服务化方式，支持流式返回
+
+### API
+
+进入service 文件夹
+
+```shell
+python telechat_service.py
+```
+![API](../images/api页面.png)
+
+默认在0.0.0.0:8070会启动telechat服务,可以使用test_json.py,test_stream.py进行测试
+
+其它机器访问服务，需要修改0.0.0.0为服务机器IP。
+### WEB
+
+在完成API部署后，运行
+
+```shell
+streamlit run webdemo.py
+```
+![API](../images/web页面.png)
+
+默认在0.0.0.0:8501
+
+其它机器访问服务，需要修改0.0.0.0为服务机器IP。
\ No newline at end of file
--- a/evaluation/readme.md
+++ b/evaluation/readme.md
+# 榜单复现方法
+
+### CEVAL
+
+首先，下载CEVAL数据集并解压：
+
+```shell
+wget https://huggingface.co/datasets/ceval/ceval-exam/resolve/main/ceval-exam.zip
+unzip ceval-exam.zip
+```
+
+之后运行预测脚本：
+
+```python
+python score_CEVAL.py --path /path/to/ckpt --five_shot
+```
+
+得到提交结果`submission.json`，并提交到[CEVAL官方网站](https://cevalbenchmark.com/)上得到评测结果。
+
+
+### MMLU
+
+数据下载路径：https://github.com/hendrycks/test?tab=readme-ov-file
+
+之后运行预测脚本：
+
+```python
+python score_MMLU.py
+```
\ No newline at end of file
--- a/evaluation/score_CEVAL.py
+++ b/evaluation/score_CEVAL.py
+import os
+import re
+import time
+import torch
+import json
+import argparse
+import pandas as pd
+from tqdm import tqdm
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+TASK2DESC = {
+    "high_school_physics": "高中物理",
+    "fire_engineer": "注册消防工程师",
+    "computer_network": "计算机网络",
+    "advanced_mathematics": "高等数学",
+    "logic": "逻辑学",
+    "middle_school_physics": "初中物理",
+    "clinical_medicine": "临床医学",
+    "probability_and_statistics": "概率统计",
+    "ideological_and_moral_cultivation": "思想道德修养与法律基础",
+    "operating_system": "操作系统",
+    "middle_school_mathematics": "初中数学",
+    "chinese_language_and_literature": "中国语言文学",
+    "electrical_engineer": "注册电气工程师",
+    "business_administration": "工商管理",
+    "high_school_geography": "高中地理",
+    "modern_chinese_history": "近代史纲要",
+    "legal_professional": "法律职业资格",
+    "middle_school_geography": "初中地理",
+    "middle_school_chemistry": "初中化学",
+    "high_school_biology": "高中生物",
+    "high_school_chemistry": "高中化学",
+    "physician": "医师资格",
+    "high_school_chinese": "高中语文",
+    "tax_accountant": "税务师",
+    "high_school_history": "高中历史",
+    "mao_zedong_thought": "毛泽东思想和中国特色社会主义理论概论",
+    "high_school_mathematics": "高中数学",
+    "professional_tour_guide": "导游资格",
+    "veterinary_medicine": "兽医学",
+    "environmental_impact_assessment_engineer": "环境影响评价工程师",
+    "basic_medicine": "基础医学",
+    "education_science": "教育学",
+    "urban_and_rural_planner": "注册城乡规划师",
+    "middle_school_biology": "初中生物",
+    "plant_protection": "植物保护",
+    "middle_school_history": "初中历史",
+    "high_school_politics": "高中政治",
+    "metrology_engineer": "注册计量师",
+    "art_studies": "艺术学",
+    "college_economics": "大学经济学",
+    "college_chemistry": "大学化学",
+    "law": "法学",
+    "sports_science": "体育学",
+    "civil_servant": "公务员",
+    "college_programming": "大学编程",
+    "middle_school_politics": "初中政治",
+    "teacher_qualification": "教师资格",
+    "computer_architecture": "计算机组成",
+    "college_physics": "大学物理",
+    "discrete_mathematics": "离散数学",
+    "marxism": "马克思主义基本原理",
+    "accountant": "注册会计师",
+}
+
+
+def build_example(question, A, B, C, D, with_answer: bool = True):
+    choice = "\n".join(
+        [
+            "A. " + A,
+            "B. " + B,
+            "C. " + C,
+            "D. " + D,
+        ]
+    )
+    answer = data["answer"].strip().upper() if with_answer else ""
+    return f"{question}\n{choice}\n答案：{answer}"
+
+def extract_answer_option(text):
+    patterns = [
+            "答案是?\s?([ABCD])",
+            "答案是?\s?：([ABCD])",
+            "答案是?\s?:([ABCD])",
+            "答案应该?是\s?([ABCD])",
+            "答案应该?选\s?([ABCD])",
+            "答案为\s?([ABCD])",
+            "选择\s?([ABCD])",
+            "只有选?项?\s?([ABCD])\s?是?对",
+            "只有选?项?\s?([ABCD])\s?是?错",
+            "只有选?项?\s?([ABCD])\s?不?正确",
+            "只有选?项?\s?([ABCD])\s?错误",
+            "说法不?对选?项?的?是\s?([ABCD])",
+            "说法不?正确选?项?的?是\s?([ABCD])",
+            "说法错误选?项?的?是\s?([ABCD])",
+            "([ABCD])\s?是正确的",
+            "([ABCD])\s?是正确答案",
+            "选项\s?([ABCD])\s?正确",
+            "所以答\s?([ABCD])",
+            "1.\s?([ABCD])[.。$]?$",
+            "所以\s?([ABCD][.。$]?$)",
+            "所有\s?([ABCD][.。$]?$)",
+            "[\s，：:,]([ABCD])[。，,\.]?$",
+            "[\s，,：:][故即]([ABCD])[。\.]?$",
+            "[\s，,：:]因此([ABCD])[。\.]?$",
+            "[是为。]\s?([ABCD])[。\.]?$",
+            "因此\s?([ABCD])[。\.]?$",
+            "显然\s?([ABCD])[。\.]?$",
+            "1.\s?(.*?)$",
+            "答案是\s?(\S+)(?:。|$)",
+            "答案应该是\s?(\S+)(?:。|$)",
+            "答案为\s?(\S+)(?:。|$)",
+    ]
+
+    regexes = [re.compile(pattern) for pattern in patterns]
+    for regex in regexes:
+        match = regex.search(text)
+        if match:
+            return match.group(1)
+    for i in text:
+        if i in "ABCD": return i
+    return "C"
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        'Evaluation',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    group = parser.add_argument_group('EVAL Task Parameters')
+    group.add_argument(
+        '--five_shot', action="store_true")
+    group.add_argument(
+        '--path', type=str, default=None)
+
+    args = parser.parse_args()
+    return args
+
+args = get_args()
+PATH = args.path
+tokenizer = AutoTokenizer.from_pretrained(PATH,trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(PATH, trust_remote_code=True, device_map="auto",torch_dtype=torch.float16)
+model.eval()
+
+
+submit_dict = {}
+filenames = os.listdir("test")
+subject_list = [test_file.replace("_test.csv","") for test_file in filenames]
+
+
+for index,subject_name in enumerate(subject_list):
+    submit_dict[subject_name] = {}
+    test_file_path=os.path.join('test',f'{subject_name}_test.csv')
+    test_df=pd.read_csv(test_file_path)
+    for idx in tqdm(range(len(test_df))):
+        id = test_df["id"][idx]
+        question = test_df["question"][idx]
+        choice_A = test_df["A"][idx]
+        choice_B = test_df["B"][idx]
+        choice_C = test_df["C"][idx]
+        choice_D = test_df["D"][idx]
+        prompt = f"以下是中国关于{TASK2DESC[subject_name]}考试的单项选择题，请选出其中的正确答案。\n\n"
+        prompt += build_example(question,choice_A,choice_B,choice_C,choice_D,with_answer=False)
+        five_shot_prompt = '''<_user>以下是中国关于大学经济学考试的单项选择题，请从A、B、C、D四个选项中选出其中的正确答案。\n问题：考虑以下小型开放经济的数据：Y=1000，C=700，G=150，I=250-1000r*。如果世界利率为5%，那么小型开放经济的净出口为____。\nA. 50\nB. -50\nC. 150\nD. -150<_bot>答案: B<_end><_user>问题：已知一垄断企业成本函数为TC=5Q2+20Q+10，产品的需求函数为Q=140-P。利润最大化的产量为____。]\nA. 10\nB. 5\nC. 3\nD. 15<_bot>答案: A<_end><_user>问题：如果消费与实际利率负相关，则______。\nA. IS曲线更平坦\nB. IS曲线更陡峭\nC. LM曲线更平坦\nD. LM曲线更陡峭<_bot>答案: A<_end>问题：如果所有产品的生产都增加了10%，且所有价格都下降了10%，会发生以下哪一种情况？____\nA. 真实GDP增加10%，名义GDP减少10%\nB. 真实GDP增加10%，名义GDP不变\nC. 真实GDP不变，名义GDP增加10%\nD. 真实GDP不变，名义GDP减少10%<_bot>答案: B<_end><_user>问题：如果边际技术替代率MRTSLK小于劳动与资本的价格之比，为使成本最小，该厂商应该____。\nA. 同时增加资本和劳动\nB. 同时减少资本和劳动\nC. 减少资本，增加劳动\nD. 增加资本，减少劳动<_bot>答案: D<_end>'''
+        if args.five_shot:
+            prompt = five_shot_prompt + "<_user>" + prompt + "<_bot>"
+        else:
+            prompt = "<_user>" + prompt + "<_bot>"
+        context_ids = tokenizer(prompt, return_tensors="pt")
+        output = model.generate(context_ids["input_ids"].to(0), max_new_tokens=2000, temperature=0.2, top_p=0.95,
+                                repetition_penalty=1.0, do_sample=False, eos_token_id=[160133, 160130])
+        output_str = tokenizer.decode(output[0].tolist()).split("<_bot>")[-1]
+        answer_extracted = extract_answer_option(output_str)
+        submit_dict[subject_name][str(id)] = answer_extracted
+
+
+with open("submission.json", 'w', encoding='utf-8') as f:
+    json.dump(submit_dict, f, ensure_ascii=False, indent=4)
+
+
+
--- a/evaluation/score_MMLU.py
+++ b/evaluation/score_MMLU.py
+import pandas as pd
+import os,torch,re,jsonlines
+from tqdm import tqdm
+from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
+os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+PATH = "../models/7B" #checkpoint path
+mmlu_dataset = "../mmlu/" #dataset path
+
+def get_few_shot_prompt(filename):
+    filepath = os.path.join(mmlu_dataset,"dev",filename.replace("test","dev"))
+    df = pd.read_csv(filepath,header=None,names=["input","A","B","C","D","answer"])
+    prompts = []
+    _hint = f'There is a single choice question about {filename.replace("_test.csv", " ")}. Answer the question by replying A, B, C or D.'
+    for i in range(len(df)):
+        line = df.iloc[i]
+        user_content = f"{_hint}\nQuestion: {line['input']}\nA. {line['A']}\nB. {line['B']}\nC. {line['C']}\nD. {line['D']}\nAnswer: "
+        bot_content = f"{line[line['answer']]}\n"
+        prompts.extend([{"role": "user", "content": user_content},{"role": "bot", "content": bot_content}])
+    return prompts[:10]
+
+# get result
+def get_input_data(test_file_path,filename):
+    _hint = f'There is a single choice question about {filename.replace("_test.csv", " ")}. Answer the question by replying A, B, C or D.'
+    df = pd.read_csv(os.path.join(test_file_path,filename),header=None,names=["input","A","B","C","D","answer"])
+    data = []
+    for i in range(len(df)):
+        line = df.iloc[i]
+        input = f"{_hint}\nQuestion: {line['input']}\nA. {line['A']}\nB. {line['B']}\nC. {line['C']}\nD. {line['D']}\nAnswer: "
+        target = line["answer"]
+        data.append({"input":input,"target":target})
+    return data
+# post process
+def get_capital_answer(text):
+    patterns = [
+        "the answer is ([A-E])",
+        "the answer is([A-E])",
+        "Answer: ([A-E])",
+        "Answer: \(([A-E])\)",
+        "Option \(([A-E])\)",
+        "Answer:([A-E])",
+        "Option ([A-E])",
+        "Opt ([A-E])"
+    ]
+    for pattern in patterns:
+        match = re.search(pattern,text,re.IGNORECASE)
+        if match:
+            return match.group(1)
+    match = re.findall("[A-D]", text)
+    if match:
+        return match[0]
+    return ""
+if __name__ == "__main__":
+    tokenizer = AutoTokenizer.from_pretrained(PATH)
+    model = AutoModelForCausalLM.from_pretrained(PATH, trust_remote_code=True, device_map="auto",
+                                                torch_dtype=torch.float16)
+    generate_config = GenerationConfig.from_pretrained(PATH)
+    generate_config.temperature = 0.1
+    generate_config.top_k = 50
+    generate_config.top_p = 0.95
+    print(generate_config)
+    model.eval()
+    test_file_path = mmlu_dataset + "test"
+    filenames = os.listdir(test_file_path)
+    score_list = []
+    for filename in filenames:
+        score, total = 0, 0
+        few_shot_prompt = get_few_shot_prompt(filename)
+        input_list = get_input_data(test_file_path,filename)
+        for line in tqdm(input_list):
+            answer, history = model.chat(tokenizer = tokenizer, question=line["input"], history=few_shot_prompt, generation_config = generate_config,stream=False)
+            answer = get_capital_answer(answer)
+            if answer == line["target"]:
+                score +=1
+            total += 1
+        score_list.append(score/total)
+    final_score = sum(score_list)/len(score_list)
+    print(final_score)
\ No newline at end of file
--- a/example_datas/multi_turn_example.jsonl
+++ b/example_datas/multi_turn_example.jsonl
--- a/example_datas/single_turn_example.jsonl
+++ b/example_datas/single_turn_example.jsonl
--- a/example_datas/test.jsonl
+++ b/example_datas/test.jsonl
+{"input": "提供一份优秀的演讲模板。"}
+{"input": "描述一片自然景观，例如海滩或山脉。"}
+{"input": "设计一份高质量的舞蹈表演节目，包括音乐、服装、舞蹈编排。"}
+{"input": "如果一个石头从8米高的悬崖上落下，它将在多长时间内落地？（不考虑空气阻力）"}
+{"input": "什么是量子力学？为什么它很重要？"}
+{"input": "说明如何在Java中实现一个简单的数据库连接池。"}
+{"input": "提供几条实用的时尚搭配建议，例如如何穿出不俗的商务装扮以及如何配饰一身休闲装。"}
+{"input": "揭示航天器在太空中运行的基本原理和过程。"}
+{"input": "想出一个关于人生的格言，这个格言应该启发他人，包含智慧和洞察。"}
+{"input": "岩手县的公共交通如何发展？"}
+{"input": "建议加强家庭关系的方法和技巧"}
+{"input": "是不是所有的鱼都能呼吸空气？"}
+{"input": "如果把鸡蛋煮到牛奶里，可以煮出牛奶蛋吗？"}
+{"input": "生成一段对于如何训练宠物成为嗅探犬的详细说明性文本。"}
+{"input": "解释一下Oscar Awards的历史。"}
+{"input": "根据以下条件对商品进行分类：价格小于等于100元，则为A类；价格在101~500元之间，则为B类；价格大于500元，则为C类。\n商品列表：[{name: 'book', price: 50}, {name: 'bag', price: 200}, {name: 'TV', price: 800}]"}
+{"input": "提供一份关于如何防火的文档。"}
+{"input": "如何提高房屋的增值潜力？"}
+{"input": "编写一篇描绘性专栏文章的作品。"}
+{"input": "在知名论坛上发布一篇关于城市化的文章，阐述城市化对人口、环境和经济的影响。"}
+{"input": "为什么有些人会感到孤独？该如何摆脱孤独？"}
+{"input": "指点一个菜鸟驾驶员如何开车上高速公路。"}
+{"input": "手机使用时附近有火药味会导致手机爆炸，这种说法正确吗？"}
+{"input": "描述一幢建筑的外观和内部布局。"}
+{"input": "描述一个理想的家具布置方案，以最大程度地利用空间。"}
+{"input": "你是哪个团队开发的？"}
+{"input": "你跟中国电信是什么关系?"}
+{"input": "介绍下TeleChat"}
+{"input": "介绍下自己"}
+{"input": "你好"}
+{"input": "谁给你起的名字"}
+{"input": "电信主卡和副卡的区别在哪里？"}
+{"input": "说一下电信5G使用场景有几种场景？"}
+{"input": "那你会什么"}
+{"input": "英语你会吗"}
+{"input": "撰写一篇以《提高自主创新》为标题的申论，字数在700左右"}
+{"input": "请以《发展》为题，写一篇1000字的申论文章"}
+{"input": "给我写一个快速排序的代码，用java"}
+{"input": "请编写一个Java程序，在屏幕上输出从1到10的所有整数。"}
+{"input": "请编写一段代码来检查一个字符串是否为回文字符串。\n如：'racecar'"}
+{"input": "小明买了10支笔和6个橡皮，如果每支笔5元，\n\n每个橡皮2元，他一共花了多少元？"}
+{"input": "人们因技术发展得以更好地掌控时间，但也有人因此成了时间的仆人。这句话引发了你怎样的联想与思考？请写一篇文章。\n要求：选准角度，确定立意，明确文体，自拟标题；不要套作，不得抄袭；不得泄露个人信息；不少于800字。"}
+{"input": "评价一下姚明"}
+{"input": "我是一名老师，帮忙生成一个主题为“好好学习，积极向上”的朋友圈短文案，要求内容幽默风趣不死板。"}
+{"input": "设计一个时尚品牌的标志，符合品牌价值观和目标受众。"}
+{"input": "说明如何创造一个双人浪漫的晚餐。"}
+{"input": "编写一份公司年度财务报表"}
+{"input": "解释什么是汽车发动机机油，机油的作用和更换周期。"}
+{"input": "为减轻颈椎病症状，提供五个有效养生方法，并解释其机理。"}
+{"input": "针对失眠人群提出五个改善方法。"}
\ No newline at end of file
--- a/image-20240821162322664.png
+++ b/image-20240821162322664.png
--- a/images/910B-pytorch训练loss对比.png
+++ b/images/910B-pytorch训练loss对比.png
--- a/images/api页面.png
+++ b/images/api页面.png
--- a/images/lora微调正常结果.png
+++ b/images/lora微调正常结果.png
--- a/images/web页面.png
+++ b/images/web页面.png
--- a/images/wechat.jpg
+++ b/images/wechat.jpg
--- a/images/单节点全参数微调正常结果.png
+++ b/images/单节点全参数微调正常结果.png