Commit 1bfbcff0 authored by wanglch's avatar wanglch
Browse files

Initial commit

parents
Pipeline #1204 canceled with stages
git+https://github.com/modelscope/modelscope.git
PYTHONPATH=. torchrun examples/pytorch/stable_diffusion/finetune_stable_diffusion.py \
--model 'AI-ModelScope/stable-diffusion-v2-1' \
--model_revision 'v1.0.1' \
--prompt "a dog" \
--work_dir './tmp/lora_diffusion' \
--train_dataset_name 'buptwq/lora-stable-diffusion-finetune' \
--max_epochs 200 \
--lora_rank 16 \
--lora_alpha 24 \
--save_ckpt_strategy 'by_epoch' \
--logging_interval 1 \
--train.dataloader.workers_per_gpu 0 \
--evaluation.dataloader.workers_per_gpu 0 \
--train.optimizer.lr 1e-4 \
--sample_nums 10 \
--num_inference_steps 30 \
--use_model_config true
PYTHONPATH=. torchrun examples/pytorch/stable_diffusion/finetune_stable_diffusion.py \
--model 'AI-ModelScope/stable-diffusion-xl-base-1.0' \
--model_revision 'v1.0.2' \
--prompt "a dog" \
--work_dir './tmp/lora_diffusion_xl' \
--train_dataset_name 'buptwq/lora-stable-diffusion-finetune' \
--max_epochs 100 \
--lora_rank 16 \
--lora_alpha 32 \
--save_ckpt_strategy 'by_epoch' \
--logging_interval 1 \
--train.dataloader.workers_per_gpu 0 \
--evaluation.dataloader.workers_per_gpu 0 \
--train.optimizer.lr 1e-4 \
--sample_nums 10 \
--num_inference_steps 30 \
--use_model_config true
-r requirements/framework.txt
decord
diffusers==0.25.0
einops
torchvision
docutils>=0.16.0
myst_parser
recommonmark
sphinx>=5.3.0
sphinx-book-theme
sphinx-copybutton
sphinx_markdown_tables
accelerate
aiohttp
binpacking
dacite
jieba
matplotlib
modelscope>=1.14
nltk
numpy
optimum>=1.17.0
pandas
peft>=0.11.0,<0.12.0
requests
rouge
safetensors
tensorboard
tqdm
transformers>=4.33,<4.42
transformers_stream_generator
trl>=0.8.2
charset_normalizer
cpm_kernels
fastapi
gradio>=3.40.0
sentencepiece
tiktoken
uvicorn
expecttest
flake8
isort>=4.3.21
modelscope
pre-commit
yapf==0.30.0 # use fix version to ensure consistent auto-styling
{
"cmd": "sft",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"eval_requirements": {
"gpu": "1"
},
"eval_dataset": ["ceval", "gsm8k", "arc"],
"args": {
"model_type": "llama2-7b-aqlm-2bit-1x16",
"dataset": "dureader-robust-zh",
"batch_size": 1,
"max_length": 1024,
"gradient_accumulation_steps": 16,
"learning_rate": 5e-5,
"use_flash_attn": true,
"eval_steps": 1000,
"save_steps": 1000,
"train_dataset_sample": 100000,
"val_dataset_sample": 3000,
"num_train_epochs": 2,
"check_dataset_strategy": "none",
"gradient_checkpointing": true,
"weight_decay": 0.01,
"max_grad_norm": 1.0,
"warmup_ratio": 0.03,
"save_total_limit": 2,
"logging_steps": 10,
"sft_type": "lora",
"lora_target_modules": "ALL",
"lora_rank": 8,
"lora_alpha": 32
},
"experiment": [
{
"name": "llama2-7b-aqlm-2bit-1x16"
}
]
}
{
"cmd": "sft",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"eval_requirements": {
"gpu": "1"
},
"eval_dataset": ["ceval", "gsm8k", "arc"],
"args": {
"model_type": "qwen1half-7b-chat-awq",
"dataset": "ms-agent",
"train_dataset_mix_ratio": 2.0,
"batch_size": 1,
"max_length": 2048,
"use_loss_scale": true,
"gradient_accumulation_steps": 16,
"learning_rate": 5e-5,
"use_flash_attn": true,
"eval_steps": 2000,
"save_steps": 2000,
"train_dataset_sample": -1,
"val_dataset_sample": 5000,
"num_train_epochs": 2,
"gradient_checkpointing": true,
"weight_decay": 0.01,
"warmup_ratio": 0.03,
"save_total_limit": 2,
"logging_steps": 10,
"sft_type": "lora",
"lora_target_modules": "ALL",
"lora_rank": 8,
"lora_alpha": 32
},
"experiment": [
{
"name": "qwen1half-7b-chat-awq"
}
]
}
{
"cmd": "dpo",
"requirements":{
"gpu": "8",
"ddp": "8"
},
"eval_requirements": {
"gpu": "1"
},
"eval_dataset": ["ceval", "gsm8k", "arc"],
"args": {
"model_type": "llama2-7b",
"ref_model_type": "llama2-7b",
"template_type": "llama",
"dataset": "hh-rlhf-cn",
"train_dataset_sample": 200000,
"truncation_strategy": "truncation_left",
"val_dataset_sample": 10000,
"num_train_epochs": 1,
"max_length": 1024,
"max_prompt_length": 512,
"check_dataset_strategy": "none",
"gradient_checkpointing": true,
"batch_size": 1,
"weight_decay": 0.01,
"learning_rate": 5e-5,
"gradient_accumulation_steps": 2,
"max_grad_norm": 1.0,
"warmup_ratio": 0.03,
"eval_steps": 2000,
"save_steps": 2000,
"save_total_limit": 2,
"logging_steps": 10,
"sft_type": "lora",
"lora_target_modules": "ALL EMBEDDING",
"lora_rank": 8,
"lora_alpha": 32
},
"experiment": [
{
"name": "dpolora",
"args": {
"sft_beta": 0.0
}
},
{
"name": "dpolora+sft_beta0.1",
"args": {
"sft_beta": 0.1
}
}
]
}
{
"cmd": "eval",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"eval_requirements": {
"gpu": "1"
},
"args": {},
"eval_dataset": ["ceval", "gsm8k", "arc"],
"experiment": [{
"name": "qwen-7b-chat-eval",
"args": {
"model_type": "qwen-7b-chat"
}
}]
}
{
"cmd": "export",
"args": {
"model_type": "qwen-7b-chat",
"quant_bits": 4
},
"requirements":{
"gpu": "1"
},
"eval_requirements": {
"gpu": "1"
},
"eval_dataset": ["ceval", "gsm8k", "arc"],
"experiment": [
{
"name": "gptq-pileval",
"args": {
"quant_method": "gptq",
"dataset": "pileval"
}
},
{
"name": "gptq-ms-bench-mini",
"args": {
"quant_method": "gptq",
"dataset": "ms-bench-mini"
}
},
{
"name": "awq-pileval",
"args": {
"quant_method": "awq",
"dataset": "pileval"
}
},
{
"name": "awq-ms-bench-mini",
"args": {
"quant_method": "awq",
"dataset": "ms-bench-mini"
}
}
]
}
{
"cmd": "sft",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"eval_requirements": {
"gpu": "1"
},
"eval_dataset": ["ceval", "gsm8k", "arc"],
"args": {
"model_type": "qwen1half-7b-chat-int8",
"dataset": "ms-agent",
"train_dataset_mix_ratio": 2.0,
"batch_size": 1,
"max_length": 2048,
"use_loss_scale": true,
"gradient_accumulation_steps": 16,
"learning_rate": 5e-5,
"use_flash_attn": true,
"eval_steps": 2000,
"save_steps": 2000,
"train_dataset_sample": -1,
"val_dataset_sample": 5000,
"num_train_epochs": 2,
"gradient_checkpointing": true,
"weight_decay": 0.01,
"warmup_ratio": 0.03,
"save_total_limit": 2,
"logging_steps": 10,
"sft_type": "lora",
"lora_target_modules": "ALL",
"lora_rank": 8,
"lora_alpha": 32
},
"experiment": [
{
"name": "qwen1half-7b-chat-int8"
}
]
}
{
"cmd": "sft",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"eval_requirements": {
"gpu": "1"
},
"eval_dataset": ["ceval", "gsm8k", "arc"],
"args": {
"model_type": "qwen-7b-chat",
"dataset": "ms-agent",
"train_dataset_mix_ratio": 2.0,
"batch_size": 1,
"max_length": 2048,
"use_loss_scale": true,
"gradient_accumulation_steps": 16,
"learning_rate": 5e-5,
"use_flash_attn": true,
"eval_steps": 2000,
"save_steps": 2000,
"train_dataset_sample": -1,
"val_dataset_sample": 5000,
"num_train_epochs": 2,
"check_dataset_strategy": "none",
"gradient_checkpointing": true,
"weight_decay": 0.01,
"warmup_ratio": 0.03,
"save_total_limit": 2,
"logging_steps": 10
},
"experiment": [
{
"name": "lora",
"args": {
"sft_type": "lora",
"lora_target_modules": "ALL",
"lora_rank": 8,
"lora_alpha": 32
}
},
{
"name": "lora+packing",
"args": {
"sft_type": "lora",
"lora_target_modules": "ALL",
"lora_rank": 8,
"lora_alpha": 32,
"packing": true,
"eval_steps": 200,
"save_steps": 200
}
},
{
"name": "lora+packing+ddp",
"requirements":{
"gpu": "2",
"ddp": "2"
},
"args": {
"sft_type": "lora",
"lora_target_modules": "ALL",
"lora_rank": 8,
"lora_alpha": 32,
"packing": true,
"eval_steps": 100,
"save_steps": 100
}
},
{
"name": "lora+packing+lazytokenize",
"args": {
"sft_type": "lora",
"lora_target_modules": "ALL",
"lora_rank": 8,
"lora_alpha": 32,
"packing": true,
"lazy_tokenize": true,
"eval_steps": 200,
"save_steps": 200
}
},
{
"name": "lora+",
"args": {
"sft_type": "lora",
"lora_target_modules": "ALL",
"lora_rank": 8,
"lora_alpha": 32,
"lora_lr_ratio": 16.0
}
},
{
"name": "rslora",
"args": {
"sft_type": "lora",
"lora_target_modules": "ALL",
"lora_rank": 8,
"lora_alpha": 32,
"use_rslora": true
}
},
{
"name": "dora",
"args": {
"sft_type": "lora",
"lora_target_modules": "ALL",
"lora_rank": 8,
"lora_alpha": 32,
"use_dora": true
}
},
{
"name": "lora+neftune",
"args": {
"sft_type": "lora",
"lora_target_modules": "ALL",
"lora_rank": 8,
"lora_alpha": 32,
"neftune_noise_alpha": 15.0
}
},
{
"name": "llamapro",
"args": {
"sft_type": "llamapro",
"llamapro_num_new_blocks": "4"
}
},
{
"name": "full",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"sft_type": "full"
}
},
{
"name": "full+galore128",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"sft_type": "full",
"use_galore": "true",
"galore_rank": "128",
"galore_update_proj_gap": "200",
"galore_optim_per_parameter": "false",
"galore_with_embedding": "false"
}
},
{
"name": "full+galore64",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"sft_type": "full",
"use_galore": "true",
"galore_rank": "64",
"galore_update_proj_gap": "200",
"galore_optim_per_parameter": "false",
"galore_with_embedding": "false"
}
},
{
"name": "full+galore32",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"sft_type": "full",
"use_galore": "true",
"galore_rank": "32",
"galore_update_proj_gap": "200",
"galore_optim_per_parameter": "false",
"galore_with_embedding": "false"
}
},
{
"name": "full+galore_emb",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"sft_type": "full",
"use_galore": "true",
"galore_rank": "128",
"galore_update_proj_gap": "200",
"galore_optim_per_parameter": "false",
"galore_with_embedding": "true"
}
},
{
"name": "full+galore_perparam",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"sft_type": "full",
"use_galore": "true",
"galore_rank": "128",
"galore_update_proj_gap": "200",
"galore_optim_per_parameter": "true",
"galore_with_embedding": "false"
}
},
{
"name": "adalora",
"args": {
"sft_type": "adalora",
"lora_target_modules": "ALL",
"lora_rank": 8,
"lora_alpha": 32
}
},
{
"name": "adapter",
"args": {
"sft_type": "adapter"
}
},
{
"name": "ia3",
"args": {
"sft_type": "ia3",
"ia3_target_modules": "ALL",
"ia3_feedforward_modules": "mlp.gate_proj mlp.up_proj mlp.down_proj"
}
},
{
"name": "lora+no_mix",
"info": "lora无混合数据集",
"args": {
"sft_type": "lora",
"lora_target_modules": "ALL",
"lora_rank": 8,
"lora_alpha": 32,
"train_dataset_mix_ratio": 0.0
}
},
{
"name": "full+lisa_2",
"info": "lisa 2layers + full",
"args": {
"sft_type": "full",
"lisa_activated_layers": 2,
"lisa_step_interval": 20
}
},
{
"name": "full+lisa_4",
"info": "lisa 4layers + full",
"args": {
"sft_type": "full",
"lisa_activated_layers": 4,
"lisa_step_interval": 20
}
},
{
"name": "full+no_mix",
"info": "全参无混合数据集",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"sft_type": "full",
"train_dataset_mix_ratio": 0.0
}
},
{
"name": "unsloth+lora+q4",
"info": "unsloth lora quantization bit 4",
"args": {
"sft_type": "lora",
"tuner_backend": "unsloth",
"quantization_bit": 4,
"model_type": "llama3-8b-instruct"
}
},
{
"name": "unsloth+full",
"info": "unsloth full",
"args": {
"sft_type": "full",
"tuner_backend": "unsloth",
"model_type": "llama3-8b-instruct"
}
}
]
}
# Copyright (c) Alibaba, Inc. and its affiliates.
import argparse
import os
import os.path
from exp_utils import ExpManager, find_all_config
from swift.utils import *
logger = get_logger()
def parse_args():
parser = argparse.ArgumentParser(description='Simple args for swift experiments.')
parser.add_argument(
'--config',
type=str,
default=None,
required=True,
help='The experiment config file',
)
parser.add_argument(
'--save_dir',
type=str,
default='./experiment',
required=False,
help='The experiment output folder',
)
args = parser.parse_args()
return args
def llm_exp():
args = parse_args()
config: str = args.config
config = config.split(',')
os.makedirs(args.save_dir, exist_ok=True)
all_configs = []
if not isinstance(config, list):
config = [config]
for dir_or_file in config:
all_configs.extend(find_all_config(dir_or_file))
args.config = all_configs
exp_manager = ExpManager()
exp_manager.begin(args)
if __name__ == '__main__':
llm_exp()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment