Commit 4a40151b authored by chenych's avatar chenych
Browse files

Update v0.8.3

parent 731cf9b8
......@@ -160,8 +160,6 @@ cython_debug/
.idea/
# custom .gitignore
ms_cache/
hf_cache/
cache/
config/
saves/
......
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
version: 2
build:
os: ubuntu-22.04
tools:
python: "3.8"
sphinx:
configuration: docs/source/conf.py
formats:
- pdf
python:
install:
- requirements: docs/requirements-docs.txt
This diff is collapsed.
This diff is collapsed.
assets/wechat.jpg

122 KB | W: | H:

assets/wechat.jpg

142 KB | W: | H:

assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
  • 2-up
  • Swipe
  • Onion skin
assets/wechat_npu.jpg

193 KB | W: | H:

assets/wechat_npu.jpg

147 KB | W: | H:

assets/wechat_npu.jpg
assets/wechat_npu.jpg
assets/wechat_npu.jpg
assets/wechat_npu.jpg
  • 2-up
  • Swipe
  • Onion skin
......@@ -266,13 +266,6 @@
"hf_hub_url": "Magpie-Align/Magpie-Pro-300K-Filtered",
"formatting": "sharegpt"
},
"magpie_ultra": {
"hf_hub_url": "argilla/magpie-ultra-v0.1",
"columns": {
"prompt": "instruction",
"response": "response"
}
},
"web_instruct": {
"hf_hub_url": "TIGER-Lab/WebInstructSub",
"columns": {
......
......@@ -189,12 +189,6 @@ llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
```
#### Full-Parameter Fine-Tuning using Adam-mini
```bash
llamafactory-cli train examples/extras/adam_mini/qwen2_full_sft.yaml
```
#### LoRA+ Fine-Tuning
```bash
......
我们提供了多样化的大模型微调示例脚本。
请确保在 `llama_factory` 目录下执行下述命令。
请确保在 `LLaMA-Factory` 目录下执行下述命令。
## 目录
......@@ -11,7 +11,7 @@
- [推理 LoRA 模型](#推理-lora-模型)
- [杂项](#杂项)
使用 `HIP_VISIBLE_DEVICES`选择计算设备。
使用 `CUDA_VISIBLE_DEVICES`(GPU)或 `ASCEND_RT_VISIBLE_DEVICES`(NPU)选择计算设备。
## 示例
......@@ -189,12 +189,6 @@ llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
```
#### 使用 Adam-mini 进行全参数训练
```bash
llamafactory-cli train examples/extras/adam_mini/qwen2_full_sft.yaml
```
#### LoRA+ 微调
```bash
......
......@@ -10,7 +10,6 @@ badam_mode: layer
badam_switch_mode: ascending
badam_switch_interval: 50
badam_verbose: 2
# deepspeed: examples/deepspeed/ds_z3_config.json
### dataset
dataset: identity,alpaca_en_demo
......@@ -30,7 +29,7 @@ overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 1.0e-5
learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
......
......@@ -29,12 +29,11 @@ overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 1
learning_rate: 1.0e-5
learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
pure_bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1
......
......@@ -2,5 +2,5 @@
python scripts/llama_pro.py \
--model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \
--output_dir models/llama3-8b-pro \
--output_dir models/llama3-8b-instruct-pro \
--num_expand 8
### model
model_name_or_path: models/llama3-8b-pro
model_name_or_path: models/llama3-8b-instruct-pro
### method
stage: sft
......@@ -18,7 +18,7 @@ overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: saves/llama3-8b-pro/freeze/sft
output_dir: saves/llama3-8b-instruct-pro/freeze/sft
logging_steps: 10
save_steps: 500
plot_loss: true
......
......@@ -26,7 +26,7 @@ overwrite_output_dir: true
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
optim: paged_adamw_8bit
learning_rate: 1.0e-5
learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
......
......@@ -7,7 +7,7 @@ do_predict: true
finetuning_type: full
### dataset
eval_dataset: identity,alpaca_en_demo
dataset: identity,alpaca_en_demo
template: llama3
cutoff_len: 1024
max_samples: 50
......
......@@ -25,7 +25,7 @@ overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 2
learning_rate: 1.0e-5
learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
......
transformers>=4.41.2,<=4.43.4
datasets>=2.16.0,<=2.20.0
accelerate>=0.30.1,<=0.32.0
peft>=0.11.1,<=0.12.0
trl>=0.8.6,<=0.9.6
transformers>=4.41.2
datasets>=2.16.0
accelerate>=0.30.1
peft>=0.11.1
trl>=0.8.6
gradio>=4.0.0
pandas>=2.0.0
scipy
......
......@@ -36,11 +36,9 @@ def calculate_flops(
"""
with get_accelerator().device(0):
chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="empty", flash_attn=flash_attn))
fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.engine.model.device)
fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.model.device)
input_dict = {"input_ids": fake_input, "labels": fake_input.clone()}
flops, macs, params = get_model_profile(
chat_model.engine.model, kwargs=input_dict, print_profile=True, detailed=True
)
flops, macs, params = get_model_profile(chat_model.model, kwargs=input_dict, print_profile=True, detailed=True)
print("FLOPs:", flops)
print("MACs:", macs)
print("Params:", params)
......
......@@ -43,7 +43,7 @@ def calculate_lr(
dataset_dir: str = "data",
template: str = "default",
cutoff_len: int = 1024, # i.e. maximum input length during training
is_mistral_or_gemma: bool = False, # mistral and gemma models opt for a smaller learning rate,
is_mistral: bool = False, # mistral model uses a smaller learning rate,
packing: bool = False,
):
r"""
......@@ -84,7 +84,7 @@ def calculate_lr(
valid_ratio = valid_tokens / total_tokens
batch_valid_len = batch_max_len * valid_ratio
lr = BASE_LR * math.sqrt(batch_valid_len / BASE_BS) # lr ~ sqrt(batch_size)
lr = lr / 6.0 if is_mistral_or_gemma else lr
lr = lr / 6.0 if is_mistral else lr
print(
"Optimal learning rate is {:.2e} for valid ratio% {:.2f} and effective batch size {:.2f}".format(
lr, valid_ratio * 100, batch_valid_len
......
......@@ -19,7 +19,7 @@
import json
import os
from collections import OrderedDict
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Optional
import fire
import torch
......@@ -47,8 +47,8 @@ def block_expansion(
model_name_or_path: str,
output_dir: str,
num_expand: int,
shard_size: str = "2GB",
save_safetensors: bool = True,
shard_size: Optional[str] = "2GB",
save_safetensors: Optional[bool] = False,
):
r"""
Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment