init commit

032b90a1 · luopl · 032b90a1 · 032b90a1 · 032b90a1 · 032b90a1
Commit 032b90a1 authored Sep 12, 2024 by luopl
13 changed files
--- a/LLaMA-Factory/tests/model/test_freeze.py
+++ b/LLaMA-Factory/tests/model/test_freeze.py
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import torch
+from llamafactory.train.test_utils import load_infer_model, load_train_model
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
+TRAIN_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "stage": "sft",
+    "do_train": True,
+    "finetuning_type": "freeze",
+    "dataset": "llamafactory/tiny-supervised-dataset",
+    "dataset_dir": "ONLINE",
+    "template": "llama3",
+    "cutoff_len": 1024,
+    "overwrite_cache": True,
+    "output_dir": "dummy_dir",
+    "overwrite_output_dir": True,
+    "fp16": True,
+}
+INFER_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "finetuning_type": "freeze",
+    "template": "llama3",
+    "infer_dtype": "float16",
+}
+def test_freeze_train_all_modules():
+    model = load_train_model(freeze_trainable_layers=1, **TRAIN_ARGS)
+    for name, param in model.named_parameters():
+        if name.startswith("model.layers.1."):
+            assert param.requires_grad is True
+            assert param.dtype == torch.float32
+        else:
+            assert param.requires_grad is False
+            assert param.dtype == torch.float16
+def test_freeze_train_extra_modules():
+    model = load_train_model(freeze_trainable_layers=1, freeze_extra_modules="embed_tokens,lm_head", **TRAIN_ARGS)
+    for name, param in model.named_parameters():
+        if name.startswith("model.layers.1.") or any(module in name for module in ["embed_tokens", "lm_head"]):
+            assert param.requires_grad is True
+            assert param.dtype == torch.float32
+        else:
+            assert param.requires_grad is False
+            assert param.dtype == torch.float16
+def test_freeze_inference():
+    model = load_infer_model(**INFER_ARGS)
+    for param in model.parameters():
+        assert param.requires_grad is False
+        assert param.dtype == torch.float16
--- a/LLaMA-Factory/tests/model/test_full.py
+++ b/LLaMA-Factory/tests/model/test_full.py
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import torch
+from llamafactory.train.test_utils import load_infer_model, load_train_model
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
+TRAIN_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "stage": "sft",
+    "do_train": True,
+    "finetuning_type": "full",
+    "dataset": "llamafactory/tiny-supervised-dataset",
+    "dataset_dir": "ONLINE",
+    "template": "llama3",
+    "cutoff_len": 1024,
+    "overwrite_cache": True,
+    "output_dir": "dummy_dir",
+    "overwrite_output_dir": True,
+    "fp16": True,
+}
+INFER_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "finetuning_type": "full",
+    "template": "llama3",
+    "infer_dtype": "float16",
+}
+def test_full_train():
+    model = load_train_model(**TRAIN_ARGS)
+    for param in model.parameters():
+        assert param.requires_grad is True
+        assert param.dtype == torch.float32
+def test_full_inference():
+    model = load_infer_model(**INFER_ARGS)
+    for param in model.parameters():
+        assert param.requires_grad is False
+        assert param.dtype == torch.float16
--- a/LLaMA-Factory/tests/model/test_lora.py
+++ b/LLaMA-Factory/tests/model/test_lora.py
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import pytest
+import torch
+from llamafactory.train.test_utils import (
+    check_lora_model,
+    compare_model,
+    load_infer_model,
+    load_reference_model,
+    load_train_model,
+    patch_valuehead_model,
+)
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
+TINY_LLAMA_ADAPTER = os.environ.get("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora")
+TINY_LLAMA_VALUEHEAD = os.environ.get("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead")
+TRAIN_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "stage": "sft",
+    "do_train": True,
+    "finetuning_type": "lora",
+    "dataset": "llamafactory/tiny-supervised-dataset",
+    "dataset_dir": "ONLINE",
+    "template": "llama3",
+    "cutoff_len": 1024,
+    "overwrite_cache": True,
+    "output_dir": "dummy_dir",
+    "overwrite_output_dir": True,
+    "fp16": True,
+}
+INFER_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "adapter_name_or_path": TINY_LLAMA_ADAPTER,
+    "finetuning_type": "lora",
+    "template": "llama3",
+    "infer_dtype": "float16",
+}
+@pytest.fixture
+def fix_valuehead_cpu_loading():
+    patch_valuehead_model()
+def test_lora_train_qv_modules():
+    model = load_train_model(lora_target="q_proj,v_proj", **TRAIN_ARGS)
+    linear_modules, _ = check_lora_model(model)
+    assert linear_modules == {"q_proj", "v_proj"}
+def test_lora_train_all_modules():
+    model = load_train_model(lora_target="all", **TRAIN_ARGS)
+    linear_modules, _ = check_lora_model(model)
+    assert linear_modules == {"q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"}
+def test_lora_train_extra_modules():
+    model = load_train_model(additional_target="embed_tokens,lm_head", **TRAIN_ARGS)
+    _, extra_modules = check_lora_model(model)
+    assert extra_modules == {"embed_tokens", "lm_head"}
+def test_lora_train_old_adapters():
+    model = load_train_model(adapter_name_or_path=TINY_LLAMA_ADAPTER, create_new_adapter=False, **TRAIN_ARGS)
+    ref_model = load_reference_model(TINY_LLAMA, TINY_LLAMA_ADAPTER, use_lora=True, is_trainable=True)
+    compare_model(model, ref_model)
+def test_lora_train_new_adapters():
+    model = load_train_model(adapter_name_or_path=TINY_LLAMA_ADAPTER, create_new_adapter=True, **TRAIN_ARGS)
+    ref_model = load_reference_model(TINY_LLAMA, TINY_LLAMA_ADAPTER, use_lora=True, is_trainable=True)
+    compare_model(
+        model, ref_model, diff_keys=["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"]
+    )
+@pytest.mark.usefixtures("fix_valuehead_cpu_loading")
+def test_lora_train_valuehead():
+    model = load_train_model(add_valuehead=True, **TRAIN_ARGS)
+    ref_model = load_reference_model(TINY_LLAMA_VALUEHEAD, is_trainable=True, add_valuehead=True)
+    state_dict = model.state_dict()
+    ref_state_dict = ref_model.state_dict()
+    assert torch.allclose(state_dict["v_head.summary.weight"], ref_state_dict["v_head.summary.weight"])
+    assert torch.allclose(state_dict["v_head.summary.bias"], ref_state_dict["v_head.summary.bias"])
+def test_lora_inference():
+    model = load_infer_model(**INFER_ARGS)
+    ref_model = load_reference_model(TINY_LLAMA, TINY_LLAMA_ADAPTER, use_lora=True).merge_and_unload()
+    compare_model(model, ref_model)
--- a/LLaMA-Factory/tests/model/test_pissa.py
+++ b/LLaMA-Factory/tests/model/test_pissa.py
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from llamafactory.train.test_utils import compare_model, load_infer_model, load_reference_model, load_train_model
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
+TINY_LLAMA_PISSA = os.environ.get("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-pissa")
+TRAIN_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "stage": "sft",
+    "do_train": True,
+    "finetuning_type": "lora",
+    "pissa_init": True,
+    "pissa_iter": -1,
+    "dataset": "llamafactory/tiny-supervised-dataset",
+    "dataset_dir": "ONLINE",
+    "template": "llama3",
+    "cutoff_len": 1024,
+    "overwrite_cache": True,
+    "output_dir": "dummy_dir",
+    "overwrite_output_dir": True,
+    "fp16": True,
+}
+INFER_ARGS = {
+    "model_name_or_path": TINY_LLAMA_PISSA,
+    "adapter_name_or_path": TINY_LLAMA_PISSA,
+    "adapter_folder": "pissa_init",
+    "finetuning_type": "lora",
+    "template": "llama3",
+    "infer_dtype": "float16",
+}
+def test_pissa_train():
+    model = load_train_model(**TRAIN_ARGS)
+    ref_model = load_reference_model(TINY_LLAMA_PISSA, TINY_LLAMA_PISSA, use_pissa=True, is_trainable=True)
+    compare_model(model, ref_model)
+def test_pissa_inference():
+    model = load_infer_model(**INFER_ARGS)
+    ref_model = load_reference_model(TINY_LLAMA_PISSA, TINY_LLAMA_PISSA, use_pissa=True, is_trainable=False)
+    ref_model = ref_model.merge_and_unload()
+    compare_model(model, ref_model)
--- a/README.md
+++ b/README.md
+# Qwen2
+## 论文
+Qwen2 Technical Report
+https://arxiv.org/abs/2407.10671
+## 模型结构
+Qwen2使用字节级字节对编码，值得注意的是，这种分词器具有很高的编码效率，其压缩率优于其他选项，这有助于增强Qwen2的多语言能力。 Qwen2 超越了大多数之前的开放权重模型，包括其前身 Qwen1.5，并且在语言理解、生成、多语言能力、编码、数学和推理等各种基准测试中，与专有模型相比表现出了极具竞争力的性能。
+<div align=center>
+    <img src="./assets/qwen2.jpg"/>
+</div>
+## 算法原理
+Qwen2仍然是一个典型decoder-only的transformers大模型结构，主要包括文本输入层、embedding层、decoder层、输出层及损失函数
+<div align=center>
+    <img src="./assets/qwen2.png"/>
+</div>
+## 环境配置
+### Docker（方法一）
+推荐使用docker方式运行， 此处提供[光源](https://www.sourcefind.cn/#/service-details)拉取docker镜像的地址与使用步骤
+```
+docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-ubuntu20.04-dtk24.04.1-py3.10
+docker run -it --shm-size=1024G -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal:/opt/hyhal --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name qwen2_72B_pytorch  <your IMAGE ID> bash # <your IMAGE ID>为以上拉取的docker的镜像ID替换，本镜像为：a4dd5be0ca23
+pip install https://cancon.hpccube.com:65024/directlink/4/vllm/DAS1.1.1/vllm-0.5.0+das.opt1.3e2c63a.dtk2404.torch2.1.0-cp310-cp310-linux_x86_64.whl
+cd /path/your_code_data/
+cd LLaMA-Factory
+pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/  --trusted-host mirrors.aliyun.com
+pip install e . -i https://mirrors.aliyun.com/pypi/simple/  --trusted-host mirrors.aliyun.com
+```
+Tips:以上dtk驱动、python、torch、vllm等DCU相关工具版本需要严格一一对应。
+### Dockerfile（方法二）
+此处提供dockerfile的使用方法
+```
+docker build -t qwen2:latest .
+docker run -it --shm-size=1024G -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal:/opt/hyhal --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name qwen2_pytorch  qwen2 bash 
+pip install https://cancon.hpccube.com:65024/directlink/4/vllm/DAS1.1.1/vllm-0.5.0+das.opt1.3e2c63a.dtk2404.torch2.1.0-cp310-cp310-linux_x86_64.whl
+cd /path/your_code_data/
+cd LLaMA-Factory
+pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/  --trusted-host mirrors.aliyun.com
+pip install e . -i https://mirrors.aliyun.com/pypi/simple/  --trusted-host mirrors.aliyun.com
+```
+### Anaconda（方法三）
+此处提供本地配置、编译的详细步骤，例如：
+关于本项目DCU显卡所需的特殊深度学习库可从[光合](https://developer.hpccube.com/tool/)开发者社区下载安装。
+```
+DTK驱动:dtk24.04
+python:3.10
+torch:2.1.0
+flash-attn:2.0.4
+vllm:0.5.0
+xformers:0.0.25
+triton:2.1.0
+deepspeed:0.12.3
+apx:1.1.0
+```
+`Tips：以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应`
+其它非深度学习库参照requirement.txt安装：
+```
+cd /path/your_code_data/
+cd LLaMA-Factory
+pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/  --trusted-host mirrors.aliyun.com
+pip install e . -i https://mirrors.aliyun.com/pypi/simple/  --trusted-host mirrors.aliyun.com
+```
+## 数据集
+使用alpaca_gpt4_zh数据集，已经包含在data目录中，具体文件为alpaca_gpt4_data_zh.json
+训练数据目录结构如下，用于正常训练的完整数据集请按此目录结构进行制备：
+```
+ ── data
+    ├── alpaca_zh_demo.json
+    ├── alpaca_en_demo.json
+    ├── identity.json
+    └── ...
+```
+## 训练
+使用LLaMA-Factory框架微调
+### 模型下载
+[qwen2-7B模型下载SCNet链接](http://113.200.138.88:18080/aimodels/Qwen2-7B)
+[qwen2-7B-Instruct模型下载SCNet链接](http://113.200.138.88:18080/aimodels/Qwen2-7B-Instruct)
+[qwen2-72B模型下载SCNet链接](http://113.200.138.88:18080/aimodels/Qwen2-72B)
+[qwen2-72B-Instruct模型下载SCNet链接](http://113.200.138.88:18080/aimodels/Qwen2-72B-Instruct)
+### 单机单卡（LoRA-finetune）
+```
+#注意：根据自己的模型切换.yaml文件中的模型位置并调整其他参数
+cd /path/your_code_data/
+cd LLaMA-Factory
+HIP_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_lora/qwen2_lora_sft_ds3.yaml
+```
+### 单机多卡(LoRA-finetune）
+```
+HIP_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/train_lora/qwen2_lora_sft_ds3.yaml
+```
+## 推理
+使用vllm框架推理
+### 单机单卡
+```
+#注意：根据自己的模型切换文件中的模型位置并调整其他参数
+cd /path/your_code_data/
+python ./inference_vllm/Qwen2_7B_inference.py
+```
+### 单机多卡
+```
+python ./inference_vllm/Qwen2_72B_inference.py
+```
+其中，prompts为提示词，model为模型路径，tensor_parallel_size=4为使用卡数。
+## result
+使用的加速卡:4张 K100_AI  模型：qwen2-72B-Instruct
+<div align=left>
+    <img src="./assets/result.png"/>
+</div>
+### 精度
+模型:qwen2-72B-Instruct
+数据: identity,alpaca_zh_demo,alpaca_en_demo
+训练模式:LoRA finetune；zero3训练
+硬件：4卡，k100 AI
+在DCU上训练的收敛情况：
+<div align=left>
+    <img src="./assets/training_loss.png"/>
+</div>
+在DCU上训练时的验证收敛情况（250个steps间隔验证一次）：
+<div align=left>
+    <img src="./assets/training_eval_loss.png"/>
+</div>
+## 应用场景
+### 算法类别
+`对话问答`
+### 热点应用行业
+`科研,教育,政府,金融`
+## 源码仓库及问题反馈
+- http://developer.hpccube.com/codes/modelzoo/qwen1.5-pytorch.git
+## 参考资料
+- https://github.com/hiyouga/LLaMA-Factory
+- https://github.com/QwenLM/Qwen2
--- a/assets/qwen2.jpg
+++ b/assets/qwen2.jpg
--- a/assets/qwen2.png
+++ b/assets/qwen2.png
--- a/assets/result.png
+++ b/assets/result.png
--- a/assets/training_eval_loss.png
+++ b/assets/training_eval_loss.png
--- a/assets/training_loss.png
+++ b/assets/training_loss.png
--- a/inference_vllm/Qwen2-7B_inference.py
+++ b/inference_vllm/Qwen2-7B_inference.py
+from vllm import LLM, SamplingParams
+# Sample prompts.
+prompts = [
+    "Hello, my name is",
+    "The president of the United States is",
+    "The capital of France is",
+    "The future of AI is",
+]
+# Create a sampling params object.
+sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+# Create an LLM.
+llm = LLM(model="/data/model/Qwen2-0.5B-Instruct/",trust_remote_code=True, dtype="float16", enforce_eager=True)
+# Generate texts from the prompts. The output is a list of RequestOutput objects
+# that contain the prompt, generated text, and other information.
+outputs = llm.generate(prompts, sampling_params)
+# Print the outputs.
+for output in outputs:
+    prompt = output.prompt
+    generated_text = output.outputs[0].text
+    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
--- a/inference_vllm/Qwen2_72B_inference.py
+++ b/inference_vllm/Qwen2_72B_inference.py
+import torch
+from vllm import LLM, SamplingParams
+def main():
+    # Sample prompts.
+    prompts = [
+        "The capital of France is",
+    ]
+    # Create a sampling params object.
+    sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+    # Create an LLM object with model path and configuration.
+    llm = LLM(model="/data/model/Qwen2-72B-Instruct/",
+              tensor_parallel_size=4,
+              trust_remote_code=True,
+              dtype="float16",
+              enforce_eager=True)
+    # Generate texts from the prompts.
+    outputs = llm.generate(prompts, sampling_params)
+    # Print the outputs.
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+if __name__ == '__main__':
+    main()
--- a/model.properties
+++ b/model.properties
+# 模型唯一标识
+modelCode=998
+# 模型名称
+modelName=qwen2_pytorch
+# 模型描述
+modelDescription=Qwen2 超越了大多数之前的开放权重模型,包括其前身 Qwen1.5,并且在语言理解、生成、多语言能力、编码、数学和推理等各种基准测试中,与专有模型相比表现出了极具竞争力的性能。
+# 应用场景
+appScenario=推理,训练,对话问答,科研,教育,政府,金融
+# 框架类型
+frameType=Pytorch