Commit 8293100a authored by luopl's avatar luopl
Browse files

update to 0.9.2.dev0

parent 2778a3d0
...@@ -12,6 +12,7 @@ FORCE_CHECK_IMPORTS= ...@@ -12,6 +12,7 @@ FORCE_CHECK_IMPORTS=
LLAMAFACTORY_VERBOSITY= LLAMAFACTORY_VERBOSITY=
USE_MODELSCOPE_HUB= USE_MODELSCOPE_HUB=
USE_OPENMIND_HUB= USE_OPENMIND_HUB=
USE_RAY=
RECORD_VRAM= RECORD_VRAM=
# torchrun # torchrun
FORCE_TORCHRUN= FORCE_TORCHRUN=
......
...@@ -171,3 +171,5 @@ config/ ...@@ -171,3 +171,5 @@ config/
saves/ saves/
output/ output/
wandb/ wandb/
swanlog/
generated_predictions.jsonl
assets/wechat.jpg

165 KB | W: | H:

assets/wechat.jpg

164 KB | W: | H:

assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
  • 2-up
  • Swipe
  • Onion skin
assets/wechat_npu.jpg

167 KB | W: | H:

assets/wechat_npu.jpg

167 KB | W: | H:

assets/wechat_npu.jpg
assets/wechat_npu.jpg
assets/wechat_npu.jpg
assets/wechat_npu.jpg
  • 2-up
  • Swipe
  • Onion skin
...@@ -296,6 +296,14 @@ ...@@ -296,6 +296,14 @@
"response": "answer" "response": "answer"
} }
}, },
"openo1_sft": {
"hf_hub_url": "llamafactory/OpenO1-SFT",
"ms_hub_url": "llamafactory/OpenO1-SFT",
"columns": {
"prompt": "prompt",
"response": "response"
}
},
"llava_1k_en": { "llava_1k_en": {
"hf_hub_url": "BUAADreamer/llava-en-zh-2k", "hf_hub_url": "BUAADreamer/llava-en-zh-2k",
"subset": "en", "subset": "en",
...@@ -426,7 +434,7 @@ ...@@ -426,7 +434,7 @@
} }
}, },
"dpo_mix_en": { "dpo_mix_en": {
"hf_hub_url": "hiyouga/DPO-En-Zh-20k", "hf_hub_url": "llamafactory/DPO-En-Zh-20k",
"subset": "en", "subset": "en",
"ranking": true, "ranking": true,
"formatting": "sharegpt", "formatting": "sharegpt",
...@@ -437,7 +445,7 @@ ...@@ -437,7 +445,7 @@
} }
}, },
"dpo_mix_zh": { "dpo_mix_zh": {
"hf_hub_url": "hiyouga/DPO-En-Zh-20k", "hf_hub_url": "llamafactory/DPO-En-Zh-20k",
"subset": "zh", "subset": "zh",
"ranking": true, "ranking": true,
"formatting": "sharegpt", "formatting": "sharegpt",
......
...@@ -13,6 +13,8 @@ Make sure to execute these commands in the `LLaMA-Factory` directory. ...@@ -13,6 +13,8 @@ Make sure to execute these commands in the `LLaMA-Factory` directory.
Use `CUDA_VISIBLE_DEVICES` (GPU) or `ASCEND_RT_VISIBLE_DEVICES` (NPU) to choose computing devices. Use `CUDA_VISIBLE_DEVICES` (GPU) or `ASCEND_RT_VISIBLE_DEVICES` (NPU) to choose computing devices.
By default, LLaMA-Factory uses all visible computing devices.
## Examples ## Examples
### LoRA Fine-Tuning ### LoRA Fine-Tuning
...@@ -80,12 +82,6 @@ llamafactory-cli train examples/train_lora/llama3_preprocess.yaml ...@@ -80,12 +82,6 @@ llamafactory-cli train examples/train_lora/llama3_preprocess.yaml
llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml
``` ```
#### Batch Predicting and Computing BLEU and ROUGE Scores
```bash
llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
```
#### Supervised Fine-Tuning on Multiple Nodes #### Supervised Fine-Tuning on Multiple Nodes
```bash ```bash
...@@ -99,6 +95,12 @@ FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 ...@@ -99,6 +95,12 @@ FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml
``` ```
#### Supervised Fine-Tuning with Ray on 4 GPUs
```bash
USE_RAY=1 llamafactory-cli train examples/train_full/llama3_lora_sft_ray.yaml
```
### QLoRA Fine-Tuning ### QLoRA Fine-Tuning
#### Supervised Fine-Tuning with 4/8-bit Bitsandbytes/HQQ/EETQ Quantization (Recommended) #### Supervised Fine-Tuning with 4/8-bit Bitsandbytes/HQQ/EETQ Quantization (Recommended)
...@@ -107,6 +109,12 @@ FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3. ...@@ -107,6 +109,12 @@ FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.
llamafactory-cli train examples/train_qlora/llama3_lora_sft_otfq.yaml llamafactory-cli train examples/train_qlora/llama3_lora_sft_otfq.yaml
``` ```
#### Supervised Fine-Tuning with 4-bit Bitsandbytes Quantization on Ascend NPU
```bash
llamafactory-cli train examples/train_qlora/llama3_lora_sft_bnb_npu.yaml
```
#### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization #### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization
```bash ```bash
...@@ -130,14 +138,14 @@ llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml ...@@ -130,14 +138,14 @@ llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml
#### Supervised Fine-Tuning on Single Node #### Supervised Fine-Tuning on Single Node
```bash ```bash
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
``` ```
#### Supervised Fine-Tuning on Multiple Nodes #### Supervised Fine-Tuning on Multiple Nodes
```bash ```bash
FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
``` ```
#### Multimodal Supervised Fine-Tuning #### Multimodal Supervised Fine-Tuning
...@@ -146,12 +154,6 @@ FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llama ...@@ -146,12 +154,6 @@ FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llama
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2vl_full_sft.yaml FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2vl_full_sft.yaml
``` ```
#### Batch Predicting and Computing BLEU and ROUGE Scores
```bash
llamafactory-cli train examples/train_full/llama3_full_predict.yaml
```
### Merging LoRA Adapters and Quantization ### Merging LoRA Adapters and Quantization
#### Merge LoRA Adapters #### Merge LoRA Adapters
...@@ -170,13 +172,19 @@ llamafactory-cli export examples/merge_lora/llama3_gptq.yaml ...@@ -170,13 +172,19 @@ llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
### Inferring LoRA Fine-Tuned Models ### Inferring LoRA Fine-Tuned Models
#### Use CLI #### Batch Generation using vLLM Tensor Parallel
```
python scripts/vllm_infer.py --model_name_or_path path_to_merged_model --dataset alpaca_en_demo
```
#### Use CLI ChatBox
```bash ```bash
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
``` ```
#### Use Web UI #### Use Web UI ChatBox
```bash ```bash
llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml
...@@ -196,6 +204,12 @@ llamafactory-cli api examples/inference/llama3_lora_sft.yaml ...@@ -196,6 +204,12 @@ llamafactory-cli api examples/inference/llama3_lora_sft.yaml
llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
``` ```
#### Full-Parameter Fine-Tuning using APOLLO
```bash
llamafactory-cli train examples/extras/apollo/llama3_full_sft.yaml
```
#### Full-Parameter Fine-Tuning using BAdam #### Full-Parameter Fine-Tuning using BAdam
```bash ```bash
...@@ -238,3 +252,9 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml ...@@ -238,3 +252,9 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
```bash ```bash
bash examples/extras/fsdp_qlora/train.sh bash examples/extras/fsdp_qlora/train.sh
``` ```
#### Computing BLEU and ROUGE Scores
```bash
llamafactory-cli train examples/extras/nlg_eval/llama3_lora_predict.yaml
```
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
使用 `CUDA_VISIBLE_DEVICES`(GPU)或 `ASCEND_RT_VISIBLE_DEVICES`(NPU)选择计算设备。 使用 `CUDA_VISIBLE_DEVICES`(GPU)或 `ASCEND_RT_VISIBLE_DEVICES`(NPU)选择计算设备。
LLaMA-Factory 默认使用所有可见的计算设备。
## 示例 ## 示例
### LoRA 微调 ### LoRA 微调
...@@ -80,12 +82,6 @@ llamafactory-cli train examples/train_lora/llama3_preprocess.yaml ...@@ -80,12 +82,6 @@ llamafactory-cli train examples/train_lora/llama3_preprocess.yaml
llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml
``` ```
#### 批量预测并计算 BLEU 和 ROUGE 分数
```bash
llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
```
#### 多机指令监督微调 #### 多机指令监督微调
```bash ```bash
...@@ -99,6 +95,12 @@ FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 ...@@ -99,6 +95,12 @@ FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml
``` ```
#### 使用 Ray 在 4 张 GPU 上微调
```bash
USE_RAY=1 llamafactory-cli train examples/train_full/llama3_lora_sft_ray.yaml
```
### QLoRA 微调 ### QLoRA 微调
#### 基于 4/8 比特 Bitsandbytes/HQQ/EETQ 量化进行指令监督微调(推荐) #### 基于 4/8 比特 Bitsandbytes/HQQ/EETQ 量化进行指令监督微调(推荐)
...@@ -107,6 +109,12 @@ FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3. ...@@ -107,6 +109,12 @@ FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.
llamafactory-cli train examples/train_qlora/llama3_lora_sft_otfq.yaml llamafactory-cli train examples/train_qlora/llama3_lora_sft_otfq.yaml
``` ```
#### 在 NPU 上基于 4 比特 Bitsandbytes 量化进行指令监督微调
```bash
llamafactory-cli train examples/train_qlora/llama3_lora_sft_bnb_npu.yaml
```
#### 基于 4/8 比特 GPTQ 量化进行指令监督微调 #### 基于 4/8 比特 GPTQ 量化进行指令监督微调
```bash ```bash
...@@ -130,14 +138,14 @@ llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml ...@@ -130,14 +138,14 @@ llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml
#### 在单机上进行指令监督微调 #### 在单机上进行指令监督微调
```bash ```bash
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
``` ```
#### 在多机上进行指令监督微调 #### 在多机上进行指令监督微调
```bash ```bash
FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
``` ```
#### 多模态指令监督微调 #### 多模态指令监督微调
...@@ -146,12 +154,6 @@ FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llama ...@@ -146,12 +154,6 @@ FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llama
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2vl_full_sft.yaml FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2vl_full_sft.yaml
``` ```
#### 批量预测并计算 BLEU 和 ROUGE 分数
```bash
llamafactory-cli train examples/train_full/llama3_full_predict.yaml
```
### 合并 LoRA 适配器与模型量化 ### 合并 LoRA 适配器与模型量化
#### 合并 LoRA 适配器 #### 合并 LoRA 适配器
...@@ -170,13 +172,19 @@ llamafactory-cli export examples/merge_lora/llama3_gptq.yaml ...@@ -170,13 +172,19 @@ llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
### 推理 LoRA 模型 ### 推理 LoRA 模型
#### 使用命令行接口 #### 使用 vLLM+TP 批量推理
```
python scripts/vllm_infer.py --model_name_or_path path_to_merged_model --dataset alpaca_en_demo
```
#### 使用命令行对话框
```bash ```bash
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
``` ```
#### 使用浏览器界面 #### 使用浏览器对话框
```bash ```bash
llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml
...@@ -196,6 +204,12 @@ llamafactory-cli api examples/inference/llama3_lora_sft.yaml ...@@ -196,6 +204,12 @@ llamafactory-cli api examples/inference/llama3_lora_sft.yaml
llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
``` ```
#### 使用 APOLLO 进行全参数训练
```bash
llamafactory-cli train examples/extras/apollo/llama3_full_sft.yaml
```
#### 使用 BAdam 进行全参数训练 #### 使用 BAdam 进行全参数训练
```bash ```bash
...@@ -238,3 +252,9 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml ...@@ -238,3 +252,9 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
```bash ```bash
bash examples/extras/fsdp_qlora/train.sh bash examples/extras/fsdp_qlora/train.sh
``` ```
#### 计算 BLEU 和 ROUGE 分数
```bash
llamafactory-cli train examples/extras/nlg_eval/llama3_lora_predict.yaml
```
### model ### model
model_name_or_path: Qwen/Qwen2-1.5B-Instruct model_name_or_path: Qwen/Qwen2-1.5B-Instruct
trust_remote_code: true
### method ### method
stage: sft stage: sft
......
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method
stage: sft
do_train: true
finetuning_type: full
use_apollo: true
apollo_layerwise: true # choices: [true, false], use false for DDP training
apollo_target: all
apollo_rank: 128
apollo_scale: 32.0
apollo_scale_type: channel
### dataset
dataset: identity,alpaca_en_demo
template: llama3
cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: saves/llama3-8b/full/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 1 # use 1 for layerwise apollo
learning_rate: 1.0e-5
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
pure_bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 500
### model ### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method ### method
stage: sft stage: sft
......
### model ### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
quantization_bit: 4 quantization_bit: 4
trust_remote_code: true
### method ### method
stage: sft stage: sft
......
### model ### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method ### method
stage: sft stage: sft
do_train: true do_train: true
finetuning_type: full finetuning_type: full
use_galore: true use_galore: true
galore_layerwise: true galore_layerwise: true # choices: [true, false], use false for DDP training
galore_target: mlp,self_attn galore_target: all
galore_rank: 128 galore_rank: 128
galore_scale: 2.0 galore_scale: 2.0
...@@ -28,7 +29,7 @@ overwrite_output_dir: true ...@@ -28,7 +29,7 @@ overwrite_output_dir: true
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
gradient_accumulation_steps: 1 gradient_accumulation_steps: 1 # use 1 for layerwise galore
learning_rate: 1.0e-5 learning_rate: 1.0e-5
num_train_epochs: 3.0 num_train_epochs: 3.0
lr_scheduler_type: cosine lr_scheduler_type: cosine
......
### model ### model
model_name_or_path: models/llama3-8b-pro model_name_or_path: models/llama3-8b-pro
trust_remote_code: true
### method ### method
stage: sft stage: sft
......
### model ### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method ### method
stage: sft stage: sft
......
### model ### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method ### method
stage: sft stage: sft
......
# The batch generation can be SLOW using this config.
# For faster inference, we recommend to use `scripts/vllm_infer.py`.
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
adapter_name_or_path: saves/llama3-8b/lora/sft
trust_remote_code: true
### method
stage: sft
do_predict: true
finetuning_type: lora
### dataset
eval_dataset: identity,alpaca_en_demo
template: llama3
cutoff_len: 2048
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: saves/llama3-8b/lora/predict
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 1
predict_with_generate: true
ddp_timeout: 180000000
### model ### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method ### method
stage: sft stage: sft
......
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
template: llama3 template: llama3
infer_backend: huggingface # choices: [huggingface, vllm]
trust_remote_code: true
model_name_or_path: saves/llama3-8b/full/sft
template: llama3
infer_backend: huggingface # choices: [huggingface, vllm]
trust_remote_code: true
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
adapter_name_or_path: saves/llama3-8b/lora/sft adapter_name_or_path: saves/llama3-8b/lora/sft
template: llama3 template: llama3
finetuning_type: lora infer_backend: huggingface # choices: [huggingface, vllm]
trust_remote_code: true
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment