Commit 8293100a authored by luopl's avatar luopl
Browse files

update to 0.9.2.dev0

parent 2778a3d0
......@@ -12,6 +12,7 @@ FORCE_CHECK_IMPORTS=
LLAMAFACTORY_VERBOSITY=
USE_MODELSCOPE_HUB=
USE_OPENMIND_HUB=
USE_RAY=
RECORD_VRAM=
# torchrun
FORCE_TORCHRUN=
......
......@@ -171,3 +171,5 @@ config/
saves/
output/
wandb/
swanlog/
generated_predictions.jsonl
assets/wechat.jpg

165 KB | W: | H:

assets/wechat.jpg

164 KB | W: | H:

assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
  • 2-up
  • Swipe
  • Onion skin
assets/wechat_npu.jpg

167 KB | W: | H:

assets/wechat_npu.jpg

167 KB | W: | H:

assets/wechat_npu.jpg
assets/wechat_npu.jpg
assets/wechat_npu.jpg
assets/wechat_npu.jpg
  • 2-up
  • Swipe
  • Onion skin
......@@ -296,6 +296,14 @@
"response": "answer"
}
},
"openo1_sft": {
"hf_hub_url": "llamafactory/OpenO1-SFT",
"ms_hub_url": "llamafactory/OpenO1-SFT",
"columns": {
"prompt": "prompt",
"response": "response"
}
},
"llava_1k_en": {
"hf_hub_url": "BUAADreamer/llava-en-zh-2k",
"subset": "en",
......@@ -426,7 +434,7 @@
}
},
"dpo_mix_en": {
"hf_hub_url": "hiyouga/DPO-En-Zh-20k",
"hf_hub_url": "llamafactory/DPO-En-Zh-20k",
"subset": "en",
"ranking": true,
"formatting": "sharegpt",
......@@ -437,7 +445,7 @@
}
},
"dpo_mix_zh": {
"hf_hub_url": "hiyouga/DPO-En-Zh-20k",
"hf_hub_url": "llamafactory/DPO-En-Zh-20k",
"subset": "zh",
"ranking": true,
"formatting": "sharegpt",
......
......@@ -13,6 +13,8 @@ Make sure to execute these commands in the `LLaMA-Factory` directory.
Use `CUDA_VISIBLE_DEVICES` (GPU) or `ASCEND_RT_VISIBLE_DEVICES` (NPU) to choose computing devices.
By default, LLaMA-Factory uses all visible computing devices.
## Examples
### LoRA Fine-Tuning
......@@ -80,12 +82,6 @@ llamafactory-cli train examples/train_lora/llama3_preprocess.yaml
llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml
```
#### Batch Predicting and Computing BLEU and ROUGE Scores
```bash
llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
```
#### Supervised Fine-Tuning on Multiple Nodes
```bash
......@@ -99,6 +95,12 @@ FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml
```
#### Supervised Fine-Tuning with Ray on 4 GPUs
```bash
USE_RAY=1 llamafactory-cli train examples/train_full/llama3_lora_sft_ray.yaml
```
### QLoRA Fine-Tuning
#### Supervised Fine-Tuning with 4/8-bit Bitsandbytes/HQQ/EETQ Quantization (Recommended)
......@@ -107,6 +109,12 @@ FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.
llamafactory-cli train examples/train_qlora/llama3_lora_sft_otfq.yaml
```
#### Supervised Fine-Tuning with 4-bit Bitsandbytes Quantization on Ascend NPU
```bash
llamafactory-cli train examples/train_qlora/llama3_lora_sft_bnb_npu.yaml
```
#### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization
```bash
......@@ -130,14 +138,14 @@ llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml
#### Supervised Fine-Tuning on Single Node
```bash
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
```
#### Supervised Fine-Tuning on Multiple Nodes
```bash
FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
```
#### Multimodal Supervised Fine-Tuning
......@@ -146,12 +154,6 @@ FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llama
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2vl_full_sft.yaml
```
#### Batch Predicting and Computing BLEU and ROUGE Scores
```bash
llamafactory-cli train examples/train_full/llama3_full_predict.yaml
```
### Merging LoRA Adapters and Quantization
#### Merge LoRA Adapters
......@@ -170,13 +172,19 @@ llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
### Inferring LoRA Fine-Tuned Models
#### Use CLI
#### Batch Generation using vLLM Tensor Parallel
```
python scripts/vllm_infer.py --model_name_or_path path_to_merged_model --dataset alpaca_en_demo
```
#### Use CLI ChatBox
```bash
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
```
#### Use Web UI
#### Use Web UI ChatBox
```bash
llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml
......@@ -196,6 +204,12 @@ llamafactory-cli api examples/inference/llama3_lora_sft.yaml
llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
```
#### Full-Parameter Fine-Tuning using APOLLO
```bash
llamafactory-cli train examples/extras/apollo/llama3_full_sft.yaml
```
#### Full-Parameter Fine-Tuning using BAdam
```bash
......@@ -238,3 +252,9 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
```bash
bash examples/extras/fsdp_qlora/train.sh
```
#### Computing BLEU and ROUGE Scores
```bash
llamafactory-cli train examples/extras/nlg_eval/llama3_lora_predict.yaml
```
......@@ -13,6 +13,8 @@
使用 `CUDA_VISIBLE_DEVICES`(GPU)或 `ASCEND_RT_VISIBLE_DEVICES`(NPU)选择计算设备。
LLaMA-Factory 默认使用所有可见的计算设备。
## 示例
### LoRA 微调
......@@ -80,12 +82,6 @@ llamafactory-cli train examples/train_lora/llama3_preprocess.yaml
llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml
```
#### 批量预测并计算 BLEU 和 ROUGE 分数
```bash
llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
```
#### 多机指令监督微调
```bash
......@@ -99,6 +95,12 @@ FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml
```
#### 使用 Ray 在 4 张 GPU 上微调
```bash
USE_RAY=1 llamafactory-cli train examples/train_full/llama3_lora_sft_ray.yaml
```
### QLoRA 微调
#### 基于 4/8 比特 Bitsandbytes/HQQ/EETQ 量化进行指令监督微调(推荐)
......@@ -107,6 +109,12 @@ FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.
llamafactory-cli train examples/train_qlora/llama3_lora_sft_otfq.yaml
```
#### 在 NPU 上基于 4 比特 Bitsandbytes 量化进行指令监督微调
```bash
llamafactory-cli train examples/train_qlora/llama3_lora_sft_bnb_npu.yaml
```
#### 基于 4/8 比特 GPTQ 量化进行指令监督微调
```bash
......@@ -130,14 +138,14 @@ llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml
#### 在单机上进行指令监督微调
```bash
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
```
#### 在多机上进行指令监督微调
```bash
FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
```
#### 多模态指令监督微调
......@@ -146,12 +154,6 @@ FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llama
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2vl_full_sft.yaml
```
#### 批量预测并计算 BLEU 和 ROUGE 分数
```bash
llamafactory-cli train examples/train_full/llama3_full_predict.yaml
```
### 合并 LoRA 适配器与模型量化
#### 合并 LoRA 适配器
......@@ -170,13 +172,19 @@ llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
### 推理 LoRA 模型
#### 使用命令行接口
#### 使用 vLLM+TP 批量推理
```
python scripts/vllm_infer.py --model_name_or_path path_to_merged_model --dataset alpaca_en_demo
```
#### 使用命令行对话框
```bash
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
```
#### 使用浏览器界面
#### 使用浏览器对话框
```bash
llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml
......@@ -196,6 +204,12 @@ llamafactory-cli api examples/inference/llama3_lora_sft.yaml
llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
```
#### 使用 APOLLO 进行全参数训练
```bash
llamafactory-cli train examples/extras/apollo/llama3_full_sft.yaml
```
#### 使用 BAdam 进行全参数训练
```bash
......@@ -238,3 +252,9 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
```bash
bash examples/extras/fsdp_qlora/train.sh
```
#### 计算 BLEU 和 ROUGE 分数
```bash
llamafactory-cli train examples/extras/nlg_eval/llama3_lora_predict.yaml
```
### model
model_name_or_path: Qwen/Qwen2-1.5B-Instruct
trust_remote_code: true
### method
stage: sft
......
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method
stage: sft
do_train: true
finetuning_type: full
use_apollo: true
apollo_layerwise: true # choices: [true, false], use false for DDP training
apollo_target: all
apollo_rank: 128
apollo_scale: 32.0
apollo_scale_type: channel
### dataset
dataset: identity,alpaca_en_demo
template: llama3
cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: saves/llama3-8b/full/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 1 # use 1 for layerwise apollo
learning_rate: 1.0e-5
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
pure_bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 500
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method
stage: sft
......
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
quantization_bit: 4
trust_remote_code: true
### method
stage: sft
......
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method
stage: sft
do_train: true
finetuning_type: full
use_galore: true
galore_layerwise: true
galore_target: mlp,self_attn
galore_layerwise: true # choices: [true, false], use false for DDP training
galore_target: all
galore_rank: 128
galore_scale: 2.0
......@@ -28,7 +29,7 @@ overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 1
gradient_accumulation_steps: 1 # use 1 for layerwise galore
learning_rate: 1.0e-5
num_train_epochs: 3.0
lr_scheduler_type: cosine
......
### model
model_name_or_path: models/llama3-8b-pro
trust_remote_code: true
### method
stage: sft
......
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method
stage: sft
......
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method
stage: sft
......
# The batch generation can be SLOW using this config.
# For faster inference, we recommend to use `scripts/vllm_infer.py`.
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
adapter_name_or_path: saves/llama3-8b/lora/sft
trust_remote_code: true
### method
stage: sft
do_predict: true
finetuning_type: lora
### dataset
eval_dataset: identity,alpaca_en_demo
template: llama3
cutoff_len: 2048
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: saves/llama3-8b/lora/predict
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 1
predict_with_generate: true
ddp_timeout: 180000000
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method
stage: sft
......
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
template: llama3
infer_backend: huggingface # choices: [huggingface, vllm]
trust_remote_code: true
model_name_or_path: saves/llama3-8b/full/sft
template: llama3
infer_backend: huggingface # choices: [huggingface, vllm]
trust_remote_code: true
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
adapter_name_or_path: saves/llama3-8b/lora/sft
template: llama3
finetuning_type: lora
infer_backend: huggingface # choices: [huggingface, vllm]
trust_remote_code: true
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment