Commit 53d602e7 authored by Rayyyyy's avatar Rayyyyy
Browse files

add alpaca data

parent 917e35e3
...@@ -17,6 +17,7 @@ Llama-3中选择了一个相对标准的decoder-only的transformer架构。与Ll ...@@ -17,6 +17,7 @@ Llama-3中选择了一个相对标准的decoder-only的transformer架构。与Ll
## 环境配置 ## 环境配置
-v 路径、docker_name和imageID根据实际情况修改 -v 路径、docker_name和imageID根据实际情况修改
**注意**:bitsandbytes库功能不全,暂不支持4bits
### Docker(方法一) ### Docker(方法一)
...@@ -25,9 +26,13 @@ docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-centos7.6-dtk2 ...@@ -25,9 +26,13 @@ docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-centos7.6-dtk2
docker run -it -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro --shm-size=32G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name docker_name imageID bash docker run -it -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro --shm-size=32G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name docker_name imageID bash
cd /your_code_path/llama3_pytorch cd /your_code_path/llama3_pytorch
pip install -e . pip install -e .
pip install -U xtuner
pip install deepspeed-0.12.3+git299681e.abi0.dtk2310.torch2.1.0a0-cp38-cp38-manylinux2014_x86_64.whl
pip install bitsandbytes-0.43.0-py3-none-any.whl pip install bitsandbytes-0.43.0-py3-none-any.whl
pip install -U xtuner # 0.1.18
pip install mmengine==0.10.3
``` ```
### Dockerfile(方法二) ### Dockerfile(方法二)
...@@ -38,9 +43,13 @@ docker build --no-cache -t llama3:latest . ...@@ -38,9 +43,13 @@ docker build --no-cache -t llama3:latest .
docker run -it -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro --shm-size=32G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name docker_name imageID bash docker run -it -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro --shm-size=32G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name docker_name imageID bash
cd /your_code_path/llama3_pytorch cd /your_code_path/llama3_pytorch
pip install -e . pip install -e .
pip install -U xtuner
pip install deepspeed-0.12.3+git299681e.abi0.dtk2310.torch2.1.0a0-cp38-cp38-manylinux2014_x86_64.whl
pip install bitsandbytes-0.43.0-py3-none-any.whl pip install bitsandbytes-0.43.0-py3-none-any.whl
pip install -U xtuner # 0.1.18
pip install mmengine==0.10.3
``` ```
### Anaconda(方法三) ### Anaconda(方法三)
...@@ -49,26 +58,44 @@ pip install bitsandbytes-0.43.0-py3-none-any.whl ...@@ -49,26 +58,44 @@ pip install bitsandbytes-0.43.0-py3-none-any.whl
DTK驱动: dtk23.10.1 DTK驱动: dtk23.10.1
python: python3.8 python: python3.8
torch: 2.1.0 torch: 2.1.0
xtuner: 0.1.18
``` ```
`Tips:以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应` `Tips:以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应`
其它非深度学习库安装方式如下: 其它非深度学习库安装方式如下:
```bash ```bash
pip install -e . pip install -e .
pip install -U xtuner
pip install deepspeed-0.12.3+git299681e.abi0.dtk2310.torch2.1.0a0-cp38-cp38-manylinux2014_x86_64.whl
pip install bitsandbytes-0.43.0-py3-none-any.whl pip install bitsandbytes-0.43.0-py3-none-any.whl
pip install -U xtuner # 0.1.18
pip install mmengine==0.10.3
``` ```
## 数据集 ## 数据集
官方暂无 ```
├── llama3_pytorch
│ ├── datasets
│ ├── alpaca_data.json
│ └── multi_turn_dataset_2.json
```
## 训练 ## 训练
### xtuner微调方法 ### xtuner微调方法
1. 修改[llama3_8b_instruct_qlora_alpaca_e3_M.py](./llama3_8b_instruct_qlora_alpaca_e3_M.py)代码中的`pretrained_model_name_or_path``data_path`为本地对应数据地址; 1. 下载预训练模型,具体模型请修改 `download_models.py`
2. 根据硬件环境和自身训练需求来调整 `max_length``batch_size``accumulative_counts``max_epochs``lr``save_steps``evaluation_freq`、model.lora中的`r``lora_alpha`参数; ```bash
3. ${DCU_NUM}参数修改为要使用的DCU卡数量; cd /your_code_path/llama3_pytorch
4. 执行 pip install modelscope
python download_models.py
mv ~/.cache/modelscope/hub/LLM-Research ./
```
2. 修改[llama3_8b_instruct_qlora_alpaca_e3_M.py](./llama3_8b_instruct_qlora_alpaca_e3_M.py)代码中的`pretrained_model_name_or_path``data_path`为本地对应数据地址;
3. 根据硬件环境和自身训练需求来调整 `max_length``batch_size``accumulative_counts``max_epochs``lr``save_steps``evaluation_freq`、model.lora中的`r``lora_alpha`参数,默认参数支持4*32G;
4. ${DCU_NUM}参数修改为要使用的DCU卡数量,不同数据集需要修改llama3_8b_instruct_qlora_alpaca_e3_M.py中`SYSTEM``evaluation_inputs``dataset_map_fn``train_dataloader.sampler``train_cfg`参数设置,详情请参考代码注释项,当前默认alpaca数据集。
5. 执行
```bash ```bash
bash finetune.sh
or
NPROC_PER_NODE=${DCU_NUM} xtuner train ./llama3_8b_instruct_qlora_alpaca_e3_M.py --deepspeed deepspeed_zero2 NPROC_PER_NODE=${DCU_NUM} xtuner train ./llama3_8b_instruct_qlora_alpaca_e3_M.py --deepspeed deepspeed_zero2
``` ```
......
This diff is collapsed.
from modelscope import snapshot_download
model_dir = snapshot_download('LLM-Research/Meta-Llama-3-8B-Instruct')
print(model_dir)
\ No newline at end of file
#!/bin/bash
echo "Export params ..."
export HIP_VISIBLE_DEVICES=1,2
export HSA_FORCE_FINE_GRAIN_PCIE=1
export USE_MIOPEN_BATCHNORM=1
echo "starting finetune llama3 ..."
NPROC_PER_NODE=2 xtuner train ./llama3_8b_instruct_qlora_alpaca_e3_M.py --deepspeed deepspeed_zero2
...@@ -28,7 +28,7 @@ pretrained_model_name_or_path = '/home/llama3/Meta-Llama-3-8B-Instruct' ...@@ -28,7 +28,7 @@ pretrained_model_name_or_path = '/home/llama3/Meta-Llama-3-8B-Instruct'
use_varlen_attn = False # new use_varlen_attn = False # new
# Data # Data
data_path = '/home/llama3/datasets/multi_turn_dataset_2.json' data_path = '/home/llama3/datasets/alpaca_data.json'
prompt_template = PROMPT_TEMPLATE.llama3_chat prompt_template = PROMPT_TEMPLATE.llama3_chat
max_length = 2048 max_length = 2048
pack_to_max_length = True pack_to_max_length = True
...@@ -37,8 +37,8 @@ pack_to_max_length = True ...@@ -37,8 +37,8 @@ pack_to_max_length = True
sequence_parallel_size = 1 sequence_parallel_size = 1
# Scheduler & Optimizer # Scheduler & Optimizer
batch_size = 16 # per_device batch_size = 1 # per_device
accumulative_counts = 1 accumulative_counts = 16
accumulative_counts *= sequence_parallel_size accumulative_counts *= sequence_parallel_size
dataloader_num_workers = 0 dataloader_num_workers = 0
max_epochs = 3 max_epochs = 3
...@@ -55,16 +55,18 @@ save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) ...@@ -55,16 +55,18 @@ save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited)
# Evaluate the generation performance during the training # Evaluate the generation performance during the training
evaluation_freq = 500 evaluation_freq = 500
# SYSTEM = SYSTEM_TEMPLATE.alpaca # alpaca data
# evaluation_inputs = [ SYSTEM = SYSTEM_TEMPLATE.alpaca
# '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
# ]
SYSTEM = "你由EmoLLM团队打造的中文领域心理健康助手, 是一个研究过无数具有心理健康问题的病人与心理健康医生对话的心理专家, 在心理方面拥有广博的知识储备和丰富的研究咨询经验,接下来你将只使用中文来回答和咨询问题。"
evaluation_inputs = [ evaluation_inputs = [
'我最近总是感到很焦虑,尤其是在学业上。我有个特别崇拜的同学,他好像在各方面都比我优秀,我总觉得自己怎么努力也追不上他,这让我压力特别大。', '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
'我知道应该理性看待,但就是忍不住会去比较。我甚至晚上会因为这个睡不着觉,总想着怎样才能像他那样出色。',
'我今天心情不好,感觉不开心,很烦。'
] ]
# Emo multi_turn_dataset_2.json data
# SYSTEM = "你由EmoLLM团队打造的中文领域心理健康助手, 是一个研究过无数具有心理健康问题的病人与心理健康医生对话的心理专家, 在心理方面拥有广博的知识储备和丰富的研究咨询经验,接下来你将只使用中文来回答和咨询问题。"
# evaluation_inputs = [
# '我最近总是感到很焦虑,尤其是在学业上。我有个特别崇拜的同学,他好像在各方面都比我优秀,我总觉得自己怎么努力也追不上他,这让我压力特别大。',
# '我知道应该理性看待,但就是忍不住会去比较。我甚至晚上会因为这个睡不着觉,总想着怎样才能像他那样出色。',
# '我今天心情不好,感觉不开心,很烦。'
# ]
####################################################################### #######################################################################
# PART 2 Model & Tokenizer # # PART 2 Model & Tokenizer #
...@@ -83,20 +85,11 @@ model = dict( ...@@ -83,20 +85,11 @@ model = dict(
pretrained_model_name_or_path=pretrained_model_name_or_path, pretrained_model_name_or_path=pretrained_model_name_or_path,
trust_remote_code=True, trust_remote_code=True,
torch_dtype=torch.float16, torch_dtype=torch.float16,
# quantization_config=dict(
# type=BitsAndBytesConfig,
# load_in_4bit=False,
# load_in_8bit=False,
# llm_int8_threshold=6.0,
# llm_int8_has_fp16_weight=False,
# bnb_4bit_compute_dtype=torch.float16,
# bnb_4bit_use_double_quant=False,
# bnb_4bit_quant_type='nf4')
), ),
lora=dict( lora=dict(
type=LoraConfig, type=LoraConfig,
r=32,# 64 r=64,# 32
lora_alpha=64,#16 lora_alpha=16, # 64
lora_dropout=0.1, lora_dropout=0.1,
bias='none', bias='none',
task_type='CAUSAL_LM')) task_type='CAUSAL_LM'))
...@@ -105,12 +98,11 @@ model = dict( ...@@ -105,12 +98,11 @@ model = dict(
####################################################################### #######################################################################
alpaca_en = dict( alpaca_en = dict(
type=process_hf_dataset, type=process_hf_dataset,
# dataset=dict(type=load_dataset, path=alpaca_en_path),
dataset=dict(type=load_dataset, path='json', data_files=dict(train=data_path)), dataset=dict(type=load_dataset, path='json', data_files=dict(train=data_path)),
tokenizer=tokenizer, tokenizer=tokenizer,
max_length=max_length, max_length=max_length,
# dataset_map_fn=alpaca_map_fn, dataset_map_fn=alpaca_map_fn, # alpaca data
dataset_map_fn=None, # dataset_map_fn=None, # multi_turn_dataset_2.json data
template_map_fn=dict( template_map_fn=dict(
type=template_map_fn_factory, template=prompt_template), type=template_map_fn_factory, template=prompt_template),
remove_unused_columns=True, remove_unused_columns=True,
...@@ -124,8 +116,8 @@ train_dataloader = dict( ...@@ -124,8 +116,8 @@ train_dataloader = dict(
batch_size=batch_size, batch_size=batch_size,
num_workers=dataloader_num_workers, num_workers=dataloader_num_workers,
dataset=alpaca_en, dataset=alpaca_en,
# sampler=dict(type=sampler, shuffle=True), sampler=dict(type=sampler, shuffle=True),# alpaca data
sampler=dict(type=DefaultSampler, shuffle=True), # sampler=dict(type=DefaultSampler, shuffle=True),# multi_turn_dataset_2.json data
collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn))
####################################################################### #######################################################################
...@@ -161,8 +153,8 @@ param_scheduler = [ ...@@ -161,8 +153,8 @@ param_scheduler = [
] ]
# train, val, test setting # train, val, test setting
# train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) # alpaca data
train_cfg = dict(by_epoch=True, max_epochs=max_epochs, val_interval=1) # train_cfg = dict(by_epoch=True, max_epochs=max_epochs, val_interval=1) # # multi_turn_dataset_2.json data
####################################################################### #######################################################################
# PART 5 Runtime # # PART 5 Runtime #
......
...@@ -5,16 +5,16 @@ export HIP_VISIBLE_DEVICES=0 # 自行修改为训练的卡号和数量 ...@@ -5,16 +5,16 @@ export HIP_VISIBLE_DEVICES=0 # 自行修改为训练的卡号和数量
export HSA_FORCE_FINE_GRAIN_PCIE=1 export HSA_FORCE_FINE_GRAIN_PCIE=1
export USE_MIOPEN_BATCHNORM=1 export USE_MIOPEN_BATCHNORM=1
echo "Start ..." echo "Start ..."
# Meta-Llama-3-8B-Instruct 模型
torchrun --nproc_per_node 1 example_chat_completion.py \
--ckpt_dir ./Meta-Llama-3-8B-Instruct/original/ \
--tokenizer_path ./Meta-Llama-3-8B-Instruct/original/tokenizer.model \
--max_seq_len 512 --max_batch_size 6
# Meta-Llama-3-8B 模型 # Meta-Llama-3-8B 模型
# torchrun --nproc_per_node 1 example_text_completion.py \ torchrun --nproc_per_node 1 example_text_completion.py \
# --ckpt_dir Meta-Llama-3-8B/original/ \ --ckpt_dir Meta-Llama-3-8B/original/ \
# --tokenizer_path Meta-Llama-3-8B/original/tokenizer.model \ --tokenizer_path Meta-Llama-3-8B/original/tokenizer.model \
# --max_seq_len 128 --max_batch_size 4 --max_seq_len 128 --max_batch_size 4
# Meta-Llama-3-8B-Instruct 模型
# torchrun --nproc_per_node 1 example_chat_completion.py \
# --ckpt_dir ./Meta-Llama-3-8B-Instruct/original/ \
# --tokenizer_path ./Meta-Llama-3-8B-Instruct/original/tokenizer.model \
# --max_seq_len 512 --max_batch_size 6
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment