Commit 1fb16033 authored by zhaoying1's avatar zhaoying1
Browse files

update

parent 7f8094a3
......@@ -3,5 +3,4 @@ COPY requirements.txt requirements.txt
RUN source /opt/dtk-23.04/env.sh
RUN cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo 'Asia/Shanghai' >/etc/timezone
ENV LANG C.UTF-8
RUN pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
RUN pip install accelerate --no-dependencies -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
\ No newline at end of file
RUN pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
\ No newline at end of file
File mode changed from 100644 to 100755
......@@ -30,13 +30,11 @@ Baichuan整体模型基于标准的Transformer结构,采用了和LLaMA一样
### Docker(方式一)
推荐使用docker方式运行,提供拉取的docker镜像:
```
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:1.13.1-centos7.6-dtk-23.04-py37-latest
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:1.13.1-centos7.6-dtk-23.04-py38-latest
```
安装docker中没有的依赖:
```
pip install transformers==4.28.0 -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
pip install accelerate --no-dependencies -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
pip install datasets peft tokenizers sentencepiece numpy -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
```
......@@ -50,7 +48,7 @@ docker exec -it baichuan2 /bin/bash
### Conda(方式三)
1. 创建conda虚拟环境:
```
conda create -n chatglm python=3.7
conda create -n chatglm python=3.8
```
2. 关于本项目DCU显卡所需的工具包、深度学习库等均可从[光合](https://developer.hpccube.com/tool/)开发者社区下载安装。
......@@ -62,8 +60,25 @@ conda create -n chatglm python=3.7
3. 其它依赖库参照requirements.txt安装:
```
pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
pip install accelerate --no-dependencies -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
```
### 注意
```
#到虚拟环境下对应的python/site-packages注释掉一些版本判断
site-packages/accelerate/accelerator.py 文件
287 #if not is_deepspeed_available():
288 # raise ImportError("DeepSpeed is not installed => run `pip install deepspeed` or build it from source.")
289 #if compare_versions("deepspeed", "<", "0.9.3"):
290 # raise ImportError("DeepSpeed version must be >= 0.9.3. Please update DeepSpeed.")
site-packages/transformers/utils/versions.py 文件
43 #if not ops[op](version.parse(got_ver), version.parse(want_ver)):
44 # raise ImportError(
45 # f"{requirement} is required for a normal functioning of this module, but found {pkg}=={got_ver}.{hint}"
46 # )
```
## 数据集
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
{
"train_micro_batch_size_per_gpu": "auto",
"zero_allow_untested_optimizer": true,
"fp16": {
"enabled": "auto",
"loss_scale": 0,
"initial_scale_power": 16,
"loss_scale_window": 1000,
"hysteresis": 2,
"min_loss_scale": 1
},
"zero_force_ds_cpu_optimizer": false,
"zero_optimization": {
"stage": 2,
"stage3_gather_16bit_weights_on_model_save": true,
"allgather_partitions": true,
"allgather_bucket_size": 5e8,
"overlap_comm": false,
"reduce_scatter": true,
"reduce_bucket_size": 5e8,
"contiguous_gradients" : true
}
}
File mode changed from 100644 to 100755
hostfile=""
HIP_VISIBLE_DEVICES=0,1,2,3 deepspeed --hostfile=$hostfile fine-tune.py \
HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 deepspeed --hostfile=$hostfile fine-tune.py \
--report_to "none" \
--data_path "data/belle_chat_ramdon_10k.json" \
--model_name_or_path "../baichuan2-7b-base" \
--model_name_or_path "../../baichuan2-13b-chat-hf" \
--output_dir "output" \
--model_max_length 512 \
--num_train_epochs 4 \
--per_device_train_batch_size 2 \
--per_device_train_batch_size 1 \
--gradient_accumulation_steps 1 \
--save_strategy epoch \
--learning_rate 2e-5 \
......
hostfile=""
HIP_VISIBLE_DEVICES=0,1,2,3 deepspeed --hostfile=$hostfile fine-tune.py \
HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 deepspeed --hostfile=$hostfile fine-tune.py \
--report_to "none" \
--data_path "data/belle_chat_ramdon_10k.json" \
--model_name_or_path "../baichuan2-7b-base" \
--data_path "data/test.json" \
--model_name_or_path "../../baichuan2-13b-chat-hf" \
--output_dir "output" \
--model_max_length 512 \
--model_max_length 64 \
--num_train_epochs 4 \
--per_device_train_batch_size 2 \
--gradient_accumulation_steps 1 \
......@@ -19,6 +19,6 @@ HIP_VISIBLE_DEVICES=0,1,2,3 deepspeed --hostfile=$hostfile fine-tune.py \
--warmup_ratio 0.0 \
--logging_steps 1 \
--gradient_checkpointing True \
--deepspeed ds_config_zero2.json \
--deepspeed ds_config.json \
--fp16 \
--use_lora True
File mode changed from 100644 to 100755
{
"train_micro_batch_size_per_gpu": "auto",
"zero_allow_untested_optimizer": true,
"fp16": {
"enabled": "auto",
"loss_scale": 0,
"initial_scale_power": 16,
"loss_scale_window": 1000,
"hysteresis": 2,
"min_loss_scale": 1
},
"zero_force_ds_cpu_optimizer": false,
"zero_optimization": {
"stage": 2,
"stage3_gather_16bit_weights_on_model_save": true,
"allgather_partitions": true,
"allgather_bucket_size": 5e8,
"overlap_comm": false,
"reduce_scatter": true,
"reduce_bucket_size": 5e8,
"contiguous_gradients" : true
}
}
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -22,7 +22,7 @@ echo "WORLD_SIZE*************$WORLD_SIZE"
APP="python3 ../fine-tune.py \
--deepspeed ../ds_config_zero2.json \
--deepspeed ds_config.json \
--report_to "none" \
--data_path "../data/belle_chat_ramdon_10k.json" \
--model_name_or_path "../../baichuan2-7b-base" \
......
transformers==4.31.0
datasets>=2.12.0
accelerate>=0.21.0
peft==0.4.0
colorama
numpy
transformers==4.28.0
sentencepiece
tokenizers
accelerate
streamlit
transformers_stream_generator
f14r1n19
f14r2n00
f14r2n01
f14r2n02
f14r2n03
f14r2n04
f14r2n05
f14r2n06
f14r2n07
f14r2n08
f14r2n09
f14r2n10
f14r2n11
f14r2n12
f14r2n13
f14r2n14
f14r2n15
f14r2n16
f14r2n17
f14r2n18
f14r2n19
f14r3n00
f14r3n01
f14r3n02
f14r1n19 slots=4
f14r2n00 slots=4
f14r2n01 slots=4
f14r2n02 slots=4
f14r2n03 slots=4
f14r2n04 slots=4
f14r2n05 slots=4
f14r2n06 slots=4
f14r2n07 slots=4
f14r2n08 slots=4
f14r2n09 slots=4
f14r2n10 slots=4
f14r2n11 slots=4
f14r2n12 slots=4
f14r2n13 slots=4
f14r2n14 slots=4
f14r2n15 slots=4
f14r2n16 slots=4
f14r2n17 slots=4
f14r2n18 slots=4
f14r2n19 slots=4
f14r3n00 slots=4
f14r3n01 slots=4
f14r3n02 slots=4
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment