update

1fb16033 · zhaoying1 · 7f8094a3 · 1fb16033 · 1fb16033 · 1fb16033
Commit 1fb16033 authored Nov 10, 2023 by zhaoying1
20 changed files
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,5 +3,4 @@ COPY requirements.txt requirements.txt
 RUN source /opt/dtk-23.04/env.sh
 RUN cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo 'Asia/Shanghai' >/etc/timezone 
 ENV LANG C.UTF-8
-RUN pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
-RUN pip install accelerate --no-dependencies -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
\ No newline at end of file
+RUN pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
\ No newline at end of file
--- a/LICENSE
+++ b/LICENSE
--- a/README.md
+++ b/README.md
@@ -30,13 +30,11 @@ Baichuan整体模型基于标准的Transformer结构，采用了和LLaMA一样
 ### Docker(方式一)
 推荐使用docker方式运行，提供拉取的docker镜像：
 ```
-docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:1.13.1-centos7.6-dtk-23.04-py37-latest
+docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:1.13.1-centos7.6-dtk-23.04-py38-latest
 ```
 安装docker中没有的依赖:
 ```
-pip install transformers==4.28.0 -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
-pip install accelerate --no-dependencies -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
-pip install datasets peft tokenizers sentencepiece numpy -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
+pip install -r requirements.txt  -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com

 ```

@@ -50,7 +48,7 @@ docker exec -it baichuan2 /bin/bash
 ### Conda(方式三)
 1. 创建conda虚拟环境：
 ```
-conda create -n chatglm python=3.7
+conda create -n chatglm python=3.8
 ```

 2. 关于本项目DCU显卡所需的工具包、深度学习库等均可从[光合](https://developer.hpccube.com/tool/)开发者社区下载安装。
@@ -62,8 +60,25 @@ conda create -n chatglm python=3.7

 3. 其它依赖库参照requirements.txt安装：
 ```
-pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com 
-pip install accelerate --no-dependencies -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
+pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
+```
+
+### 注意
+
+```
+#到虚拟环境下对应的python/site-packages注释掉一些版本判断
+site-packages/accelerate/accelerator.py 文件
+
+ 287             #if not is_deepspeed_available():
+ 288             #    raise ImportError("DeepSpeed is not installed => run `pip install deepspeed` or build it from source.")
+ 289             #if compare_versions("deepspeed", "<", "0.9.3"):
+ 290             #    raise ImportError("DeepSpeed version must be >= 0.9.3. Please update DeepSpeed.")
+ 
+site-packages/transformers/utils/versions.py 文件
+ 43     #if not ops[op](version.parse(got_ver), version.parse(want_ver)):
+ 44     #    raise ImportError(
+ 45     #        f"{requirement} is required for a normal functioning of this module, but found {pkg}=={got_ver}.{hint}"
+ 46     #    )
 ```

 ## 数据集

--- a/cli_demo.py
+++ b/cli_demo.py
--- a/fine-tune/ds_config.json
+++ b/fine-tune/ds_config.json
--- a/fine-tune/ds_config_zero2.json
+++ b/fine-tune/ds_config_zero2.json
-{
-    "train_micro_batch_size_per_gpu": "auto",
-    "zero_allow_untested_optimizer": true,
-    "fp16": {
-      "enabled": "auto",
-      "loss_scale": 0,
-      "initial_scale_power": 16, 
-      "loss_scale_window": 1000,
-      "hysteresis": 2,
-      "min_loss_scale": 1
-    }, 
-    "zero_force_ds_cpu_optimizer": false,
-    "zero_optimization": {
-    "stage": 2,
-    "stage3_gather_16bit_weights_on_model_save": true,
-    "allgather_partitions": true,
-    "allgather_bucket_size": 5e8,
-    "overlap_comm": false,
-    "reduce_scatter": true,
-    "reduce_bucket_size": 5e8,
-    "contiguous_gradients" : true
-    }
-  }
--- a/fine-tune/fine-tune.py
+++ b/fine-tune/fine-tune.py
--- a/fine-tune/ft_train.sh
+++ b/fine-tune/ft_train.sh
 hostfile=""
-HIP_VISIBLE_DEVICES=0,1,2,3 deepspeed --hostfile=$hostfile fine-tune.py  \
+HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 deepspeed --hostfile=$hostfile fine-tune.py  \
    --report_to "none" \
    --data_path "data/belle_chat_ramdon_10k.json" \
-    --model_name_or_path "../baichuan2-7b-base" \
+    --model_name_or_path "../../baichuan2-13b-chat-hf" \
    --output_dir "output" \
    --model_max_length 512 \
    --num_train_epochs 4 \
-    --per_device_train_batch_size 2 \
+    --per_device_train_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --save_strategy epoch \
    --learning_rate 2e-5 \

--- a/fine-tune/lora_train.sh
+++ b/fine-tune/lora_train.sh
 hostfile=""
-HIP_VISIBLE_DEVICES=0,1,2,3 deepspeed --hostfile=$hostfile fine-tune.py  \
+HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 deepspeed --hostfile=$hostfile fine-tune.py  \
    --report_to "none" \
-    --data_path "data/belle_chat_ramdon_10k.json" \
-    --model_name_or_path "../baichuan2-7b-base" \
+    --data_path "data/test.json" \
+    --model_name_or_path "../../baichuan2-13b-chat-hf" \
    --output_dir "output" \
-    --model_max_length 512 \
+    --model_max_length 64 \
    --num_train_epochs 4 \
    --per_device_train_batch_size 2 \
    --gradient_accumulation_steps 1 \
@@ -19,6 +19,6 @@ HIP_VISIBLE_DEVICES=0,1,2,3 deepspeed --hostfile=$hostfile fine-tune.py  \
    --warmup_ratio 0.0 \
    --logging_steps 1 \
    --gradient_checkpointing True \
-    --deepspeed ds_config_zero2.json \
+    --deepspeed ds_config.json \
    --fp16 \
    --use_lora True
--- a/fine-tune/multi-node/ds_config.json
+++ b/fine-tune/multi-node/ds_config.json
--- a/fine-tune/multi-node/ds_config_zero2.json
+++ b/fine-tune/multi-node/ds_config_zero2.json
-{
-    "train_micro_batch_size_per_gpu": "auto",
-    "zero_allow_untested_optimizer": true,
-    "fp16": {
-      "enabled": "auto",
-      "loss_scale": 0,
-      "initial_scale_power": 16, 
-      "loss_scale_window": 1000,
-      "hysteresis": 2,
-      "min_loss_scale": 1
-    }, 
-    "zero_force_ds_cpu_optimizer": false,
-    "zero_optimization": {
-    "stage": 2,
-    "stage3_gather_16bit_weights_on_model_save": true,
-    "allgather_partitions": true,
-    "allgather_bucket_size": 5e8,
-    "overlap_comm": false,
-    "reduce_scatter": true,
-    "reduce_bucket_size": 5e8,
-    "contiguous_gradients" : true
-    }
-  }
--- a/fine-tune/multi-node/env.sh
+++ b/fine-tune/multi-node/env.sh
--- a/fine-tune/multi-node/hostfile
+++ b/fine-tune/multi-node/hostfile
--- a/fine-tune/multi-node/run_ft.sh
+++ b/fine-tune/multi-node/run_ft.sh
--- a/fine-tune/multi-node/run_ft_single.sh
+++ b/fine-tune/multi-node/run_ft_single.sh
--- a/fine-tune/multi-node/run_lora.sh
+++ b/fine-tune/multi-node/run_lora.sh
--- a/fine-tune/multi-node/run_lora_single.sh
+++ b/fine-tune/multi-node/run_lora_single.sh
@@ -22,7 +22,7 @@ echo "WORLD_SIZE*************$WORLD_SIZE"


 APP="python3 ../fine-tune.py \
-    --deepspeed ../ds_config_zero2.json \
+    --deepspeed ds_config.json \
    --report_to "none" \
    --data_path "../data/belle_chat_ramdon_10k.json" \
    --model_name_or_path "../../baichuan2-7b-base" \

--- a/fine-tune/requirements.txt
+++ b/fine-tune/requirements.txt
+transformers==4.31.0
+datasets>=2.12.0
+accelerate>=0.21.0
+peft==0.4.0
+colorama
 numpy
 transformers==4.28.0
 sentencepiece
 tokenizers
-accelerate
-
+streamlit
+transformers_stream_generator
--- a/fine-tune/slurm_script/hostfile/46547085
+++ b/fine-tune/slurm_script/hostfile/46547085
-f14r1n19
-f14r2n00
-f14r2n01
-f14r2n02
-f14r2n03
-f14r2n04
-f14r2n05
-f14r2n06
-f14r2n07
-f14r2n08
-f14r2n09
-f14r2n10
-f14r2n11
-f14r2n12
-f14r2n13
-f14r2n14
-f14r2n15
-f14r2n16
-f14r2n17
-f14r2n18
-f14r2n19
-f14r3n00
-f14r3n01
-f14r3n02
--- a/fine-tune/slurm_script/hostfile/hostfile-dl-46547085
+++ b/fine-tune/slurm_script/hostfile/hostfile-dl-46547085
-f14r1n19 slots=4
-f14r2n00 slots=4
-f14r2n01 slots=4
-f14r2n02 slots=4
-f14r2n03 slots=4
-f14r2n04 slots=4
-f14r2n05 slots=4
-f14r2n06 slots=4
-f14r2n07 slots=4
-f14r2n08 slots=4
-f14r2n09 slots=4
-f14r2n10 slots=4
-f14r2n11 slots=4
-f14r2n12 slots=4
-f14r2n13 slots=4
-f14r2n14 slots=4
-f14r2n15 slots=4
-f14r2n16 slots=4
-f14r2n17 slots=4
-f14r2n18 slots=4
-f14r2n19 slots=4
-f14r3n00 slots=4
-f14r3n01 slots=4
-f14r3n02 slots=4