Initial commit

1bfbcff0 · wanglch · 1bfbcff0 · 1bfbcff0 · 1bfbcff0 · 1bfbcff0
Commit 1bfbcff0 authored Jun 13, 2024 by wanglch
20 changed files
--- a/swift-main/examples/pytorch/stable_diffusion/requirements.txt
+++ b/swift-main/examples/pytorch/stable_diffusion/requirements.txt
+git+https://github.com/modelscope/modelscope.git
--- a/swift-main/examples/pytorch/stable_diffusion/run_train_lora.sh
+++ b/swift-main/examples/pytorch/stable_diffusion/run_train_lora.sh
+PYTHONPATH=. torchrun examples/pytorch/stable_diffusion/finetune_stable_diffusion.py \
+    --model 'AI-ModelScope/stable-diffusion-v2-1' \
+    --model_revision 'v1.0.1' \
+    --prompt "a dog" \
+    --work_dir './tmp/lora_diffusion' \
+    --train_dataset_name 'buptwq/lora-stable-diffusion-finetune' \
+    --max_epochs 200 \
+    --lora_rank 16 \
+    --lora_alpha 24 \
+    --save_ckpt_strategy 'by_epoch' \
+    --logging_interval 1 \
+    --train.dataloader.workers_per_gpu 0 \
+    --evaluation.dataloader.workers_per_gpu 0 \
+    --train.optimizer.lr 1e-4 \
+    --sample_nums 10 \
+    --num_inference_steps 30 \
+    --use_model_config true
--- a/swift-main/examples/pytorch/stable_diffusion/run_train_lora_xl.sh
+++ b/swift-main/examples/pytorch/stable_diffusion/run_train_lora_xl.sh
+PYTHONPATH=. torchrun examples/pytorch/stable_diffusion/finetune_stable_diffusion.py \
+    --model 'AI-ModelScope/stable-diffusion-xl-base-1.0' \
+    --model_revision 'v1.0.2' \
+    --prompt "a dog" \
+    --work_dir './tmp/lora_diffusion_xl' \
+    --train_dataset_name 'buptwq/lora-stable-diffusion-finetune' \
+    --max_epochs 100 \
+    --lora_rank 16 \
+    --lora_alpha 32 \
+    --save_ckpt_strategy 'by_epoch' \
+    --logging_interval 1 \
+    --train.dataloader.workers_per_gpu 0 \
+    --evaluation.dataloader.workers_per_gpu 0 \
+    --train.optimizer.lr 1e-4 \
+    --sample_nums 10 \
+    --num_inference_steps 30 \
+    --use_model_config true
--- a/swift-main/requirements.txt
+++ b/swift-main/requirements.txt
+-r requirements/framework.txt
--- a/swift-main/requirements/aigc.txt
+++ b/swift-main/requirements/aigc.txt
+decord
+diffusers==0.25.0
+einops
+torchvision
--- a/swift-main/requirements/docs.txt
+++ b/swift-main/requirements/docs.txt
+docutils>=0.16.0
+myst_parser
+recommonmark
+sphinx>=5.3.0
+sphinx-book-theme
+sphinx-copybutton
+sphinx_markdown_tables
--- a/swift-main/requirements/eval.txt
+++ b/swift-main/requirements/eval.txt
+llmuses>=0.3.1
--- a/swift-main/requirements/framework.txt
+++ b/swift-main/requirements/framework.txt
+accelerate
+aiohttp
+binpacking
+dacite
+jieba
+matplotlib
+modelscope>=1.14
+nltk
+numpy
+optimum>=1.17.0
+pandas
+peft>=0.11.0,<0.12.0
+requests
+rouge
+safetensors
+tensorboard
+tqdm
+transformers>=4.33,<4.42
+transformers_stream_generator
+trl>=0.8.2
--- a/swift-main/requirements/llm.txt
+++ b/swift-main/requirements/llm.txt
+charset_normalizer
+cpm_kernels
+fastapi
+gradio>=3.40.0
+sentencepiece
+tiktoken
+uvicorn
--- a/swift-main/requirements/seq_parallel.txt
+++ b/swift-main/requirements/seq_parallel.txt
+xtuner
--- a/swift-main/requirements/tests.txt
+++ b/swift-main/requirements/tests.txt
+expecttest
+flake8
+isort>=4.3.21
+modelscope
+pre-commit
+yapf==0.30.0 # use fix version to ensure consistent auto-styling
--- a/swift-main/resources/banner.png
+++ b/swift-main/resources/banner.png
--- a/swift-main/scripts/benchmark/config/aqlm.json
+++ b/swift-main/scripts/benchmark/config/aqlm.json
+{
+    "cmd": "sft",
+    "requirements":{
+        "gpu": "1",
+        "ddp": "1"
+    },
+    "eval_requirements": {
+      "gpu": "1"
+    },
+    "eval_dataset": ["ceval", "gsm8k", "arc"],
+    "args": {
+      "model_type": "llama2-7b-aqlm-2bit-1x16",
+      "dataset": "dureader-robust-zh",
+      "batch_size": 1,
+      "max_length": 1024,
+      "gradient_accumulation_steps": 16,
+      "learning_rate": 5e-5,
+      "use_flash_attn": true,
+      "eval_steps": 1000,
+      "save_steps": 1000,
+      "train_dataset_sample": 100000,
+      "val_dataset_sample": 3000,
+      "num_train_epochs": 2,
+      "check_dataset_strategy": "none",
+      "gradient_checkpointing": true,
+      "weight_decay": 0.01,
+      "max_grad_norm": 1.0,
+      "warmup_ratio": 0.03,
+      "save_total_limit": 2,
+      "logging_steps": 10,
+      "sft_type": "lora",
+      "lora_target_modules": "ALL",
+      "lora_rank": 8,
+      "lora_alpha": 32
+    },
+    "experiment": [
+      {
+        "name": "llama2-7b-aqlm-2bit-1x16"
+      }
+    ]
+}
--- a/swift-main/scripts/benchmark/config/awq.json
+++ b/swift-main/scripts/benchmark/config/awq.json
+{
+    "cmd": "sft",
+    "requirements":{
+        "gpu": "1",
+        "ddp": "1"
+    },
+    "eval_requirements": {
+      "gpu": "1"
+    },
+    "eval_dataset": ["ceval", "gsm8k", "arc"],
+    "args": {
+      "model_type": "qwen1half-7b-chat-awq",
+      "dataset": "ms-agent",
+      "train_dataset_mix_ratio": 2.0,
+      "batch_size": 1,
+      "max_length": 2048,
+      "use_loss_scale": true,
+      "gradient_accumulation_steps": 16,
+      "learning_rate": 5e-5,
+      "use_flash_attn": true,
+      "eval_steps": 2000,
+      "save_steps": 2000,
+      "train_dataset_sample": -1,
+      "val_dataset_sample": 5000,
+      "num_train_epochs": 2,
+      "gradient_checkpointing": true,
+      "weight_decay": 0.01,
+      "warmup_ratio": 0.03,
+      "save_total_limit": 2,
+      "logging_steps": 10,
+      "sft_type": "lora",
+      "lora_target_modules": "ALL",
+      "lora_rank": 8,
+      "lora_alpha": 32
+    },
+    "experiment": [
+      {
+        "name": "qwen1half-7b-chat-awq"
+      }
+    ]
+}
--- a/swift-main/scripts/benchmark/config/dpo.json
+++ b/swift-main/scripts/benchmark/config/dpo.json
+{
+    "cmd": "dpo",
+    "requirements":{
+        "gpu": "8",
+        "ddp": "8"
+    },
+    "eval_requirements": {
+      "gpu": "1"
+    },
+    "eval_dataset": ["ceval", "gsm8k", "arc"],
+    "args": {
+        "model_type": "llama2-7b",
+        "ref_model_type": "llama2-7b",
+        "template_type": "llama",
+        "dataset": "hh-rlhf-cn",
+        "train_dataset_sample": 200000,
+        "truncation_strategy": "truncation_left",
+        "val_dataset_sample": 10000,
+        "num_train_epochs": 1,
+        "max_length": 1024,
+        "max_prompt_length": 512,
+        "check_dataset_strategy": "none",
+        "gradient_checkpointing": true,
+        "batch_size": 1,
+        "weight_decay": 0.01,
+        "learning_rate": 5e-5,
+        "gradient_accumulation_steps": 2,
+        "max_grad_norm": 1.0,
+        "warmup_ratio": 0.03,
+        "eval_steps": 2000,
+        "save_steps": 2000,
+        "save_total_limit": 2,
+        "logging_steps": 10,
+        "sft_type": "lora",
+        "lora_target_modules": "ALL EMBEDDING",
+        "lora_rank": 8,
+        "lora_alpha": 32
+    },
+    "experiment": [
+      {
+        "name": "dpolora",
+        "args": {
+          "sft_beta": 0.0
+        }
+      },
+      {
+        "name": "dpolora+sft_beta0.1",
+        "args": {
+          "sft_beta": 0.1
+        }
+      }
+    ]
+}
--- a/swift-main/scripts/benchmark/config/eval.json
+++ b/swift-main/scripts/benchmark/config/eval.json
+{
+    "cmd": "eval",
+    "requirements":{
+        "gpu": "1",
+        "ddp": "1"
+    },
+    "eval_requirements": {
+      "gpu": "1"
+    },
+    "args": {},
+    "eval_dataset": ["ceval", "gsm8k", "arc"],
+    "experiment": [{
+      "name": "qwen-7b-chat-eval",
+      "args": {
+        "model_type": "qwen-7b-chat"
+      }
+    }]
+}
--- a/swift-main/scripts/benchmark/config/export.json
+++ b/swift-main/scripts/benchmark/config/export.json
+{
+    "cmd": "export",
+    "args": {
+      "model_type": "qwen-7b-chat",
+      "quant_bits": 4
+    },
+    "requirements":{
+        "gpu": "1"
+    },
+    "eval_requirements": {
+      "gpu": "1"
+    },
+    "eval_dataset": ["ceval", "gsm8k", "arc"],
+    "experiment": [
+      {
+        "name": "gptq-pileval",
+        "args": {
+          "quant_method": "gptq",
+          "dataset": "pileval"
+        }
+      },
+      {
+        "name": "gptq-ms-bench-mini",
+        "args": {
+          "quant_method": "gptq",
+          "dataset": "ms-bench-mini"
+        }
+      },
+      {
+        "name": "awq-pileval",
+        "args": {
+          "quant_method": "awq",
+          "dataset": "pileval"
+        }
+      },
+      {
+        "name": "awq-ms-bench-mini",
+        "args": {
+          "quant_method": "awq",
+          "dataset": "ms-bench-mini"
+        }
+      }
+    ]
+}
--- a/swift-main/scripts/benchmark/config/gptq.json
+++ b/swift-main/scripts/benchmark/config/gptq.json
+{
+    "cmd": "sft",
+    "requirements":{
+        "gpu": "1",
+        "ddp": "1"
+    },
+    "eval_requirements": {
+      "gpu": "1"
+    },
+    "eval_dataset": ["ceval", "gsm8k", "arc"],
+    "args": {
+      "model_type": "qwen1half-7b-chat-int8",
+      "dataset": "ms-agent",
+      "train_dataset_mix_ratio": 2.0,
+      "batch_size": 1,
+      "max_length": 2048,
+      "use_loss_scale": true,
+      "gradient_accumulation_steps": 16,
+      "learning_rate": 5e-5,
+      "use_flash_attn": true,
+      "eval_steps": 2000,
+      "save_steps": 2000,
+      "train_dataset_sample": -1,
+      "val_dataset_sample": 5000,
+      "num_train_epochs": 2,
+      "gradient_checkpointing": true,
+      "weight_decay": 0.01,
+      "warmup_ratio": 0.03,
+      "save_total_limit": 2,
+      "logging_steps": 10,
+      "sft_type": "lora",
+      "lora_target_modules": "ALL",
+      "lora_rank": 8,
+      "lora_alpha": 32
+    },
+    "experiment": [
+      {
+        "name": "qwen1half-7b-chat-int8"
+      }
+    ]
+}
--- a/swift-main/scripts/benchmark/config/tuner.json
+++ b/swift-main/scripts/benchmark/config/tuner.json
+{
+    "cmd": "sft",
+    "requirements":{
+        "gpu": "1",
+        "ddp": "1"
+    },
+    "eval_requirements": {
+      "gpu": "1"
+    },
+    "eval_dataset": ["ceval", "gsm8k", "arc"],
+    "args": {
+      "model_type": "qwen-7b-chat",
+      "dataset": "ms-agent",
+      "train_dataset_mix_ratio": 2.0,
+      "batch_size": 1,
+      "max_length": 2048,
+      "use_loss_scale": true,
+      "gradient_accumulation_steps": 16,
+      "learning_rate": 5e-5,
+      "use_flash_attn": true,
+      "eval_steps": 2000,
+      "save_steps": 2000,
+      "train_dataset_sample": -1,
+      "val_dataset_sample": 5000,
+      "num_train_epochs": 2,
+      "check_dataset_strategy": "none",
+      "gradient_checkpointing": true,
+      "weight_decay": 0.01,
+      "warmup_ratio": 0.03,
+      "save_total_limit": 2,
+      "logging_steps": 10
+    },
+    "experiment": [
+      {
+        "name": "lora",
+        "args": {
+          "sft_type": "lora",
+          "lora_target_modules": "ALL",
+          "lora_rank": 8,
+          "lora_alpha": 32
+        }
+      },
+      {
+        "name": "lora+packing",
+        "args": {
+          "sft_type": "lora",
+          "lora_target_modules": "ALL",
+          "lora_rank": 8,
+          "lora_alpha": 32,
+          "packing": true,
+          "eval_steps": 200,
+          "save_steps": 200
+        }
+      },
+      {
+        "name": "lora+packing+ddp",
+        "requirements":{
+          "gpu": "2",
+          "ddp": "2"
+        },
+        "args": {
+          "sft_type": "lora",
+          "lora_target_modules": "ALL",
+          "lora_rank": 8,
+          "lora_alpha": 32,
+          "packing": true,
+          "eval_steps": 100,
+          "save_steps": 100
+        }
+      },
+      {
+        "name": "lora+packing+lazytokenize",
+        "args": {
+          "sft_type": "lora",
+          "lora_target_modules": "ALL",
+          "lora_rank": 8,
+          "lora_alpha": 32,
+          "packing": true,
+          "lazy_tokenize": true,
+          "eval_steps": 200,
+          "save_steps": 200
+        }
+      },
+      {
+        "name": "lora+",
+        "args": {
+          "sft_type": "lora",
+          "lora_target_modules": "ALL",
+          "lora_rank": 8,
+          "lora_alpha": 32,
+          "lora_lr_ratio": 16.0
+        }
+      },
+      {
+        "name": "rslora",
+        "args": {
+          "sft_type": "lora",
+          "lora_target_modules": "ALL",
+          "lora_rank": 8,
+          "lora_alpha": 32,
+          "use_rslora": true
+        }
+      },
+      {
+        "name": "dora",
+        "args": {
+          "sft_type": "lora",
+          "lora_target_modules": "ALL",
+          "lora_rank": 8,
+          "lora_alpha": 32,
+          "use_dora": true
+        }
+      },
+      {
+        "name": "lora+neftune",
+        "args": {
+          "sft_type": "lora",
+          "lora_target_modules": "ALL",
+          "lora_rank": 8,
+          "lora_alpha": 32,
+          "neftune_noise_alpha": 15.0
+        }
+      },
+      {
+        "name": "llamapro",
+        "args": {
+          "sft_type": "llamapro",
+          "llamapro_num_new_blocks": "4"
+        }
+      },
+      {
+        "name": "full",
+        "requirements":{
+          "gpu": "1",
+          "ddp": "1"
+        },
+        "args": {
+          "sft_type": "full"
+        }
+      },
+      {
+        "name": "full+galore128",
+        "requirements":{
+          "gpu": "1",
+          "ddp": "1"
+        },
+        "args": {
+          "sft_type": "full",
+          "use_galore": "true",
+          "galore_rank": "128",
+          "galore_update_proj_gap": "200",
+          "galore_optim_per_parameter": "false",
+          "galore_with_embedding": "false"
+        }
+      },
+      {
+        "name": "full+galore64",
+        "requirements":{
+          "gpu": "1",
+          "ddp": "1"
+        },
+        "args": {
+          "sft_type": "full",
+          "use_galore": "true",
+          "galore_rank": "64",
+          "galore_update_proj_gap": "200",
+          "galore_optim_per_parameter": "false",
+          "galore_with_embedding": "false"
+        }
+      },
+      {
+        "name": "full+galore32",
+        "requirements":{
+          "gpu": "1",
+          "ddp": "1"
+        },
+        "args": {
+          "sft_type": "full",
+          "use_galore": "true",
+          "galore_rank": "32",
+          "galore_update_proj_gap": "200",
+          "galore_optim_per_parameter": "false",
+          "galore_with_embedding": "false"
+        }
+      },
+      {
+        "name": "full+galore_emb",
+        "requirements":{
+          "gpu": "1",
+          "ddp": "1"
+        },
+        "args": {
+          "sft_type": "full",
+          "use_galore": "true",
+          "galore_rank": "128",
+          "galore_update_proj_gap": "200",
+          "galore_optim_per_parameter": "false",
+          "galore_with_embedding": "true"
+        }
+      },
+      {
+        "name": "full+galore_perparam",
+        "requirements":{
+          "gpu": "1",
+          "ddp": "1"
+        },
+        "args": {
+          "sft_type": "full",
+          "use_galore": "true",
+          "galore_rank": "128",
+          "galore_update_proj_gap": "200",
+          "galore_optim_per_parameter": "true",
+          "galore_with_embedding": "false"
+        }
+      },
+      {
+        "name": "adalora",
+        "args": {
+          "sft_type": "adalora",
+          "lora_target_modules": "ALL",
+          "lora_rank": 8,
+          "lora_alpha": 32
+        }
+      },
+      {
+        "name": "adapter",
+        "args": {
+          "sft_type": "adapter"
+        }
+      },
+      {
+        "name": "ia3",
+        "args": {
+          "sft_type": "ia3",
+          "ia3_target_modules": "ALL",
+          "ia3_feedforward_modules": "mlp.gate_proj mlp.up_proj mlp.down_proj"
+        }
+      },
+      {
+        "name": "lora+no_mix",
+        "info": "lora无混合数据集",
+        "args": {
+          "sft_type": "lora",
+          "lora_target_modules": "ALL",
+          "lora_rank": 8,
+          "lora_alpha": 32,
+          "train_dataset_mix_ratio": 0.0
+        }
+      },
+      {
+        "name": "full+lisa_2",
+        "info": "lisa 2layers + full",
+        "args": {
+          "sft_type": "full",
+          "lisa_activated_layers": 2,
+          "lisa_step_interval": 20
+        }
+      },
+      {
+        "name": "full+lisa_4",
+        "info": "lisa 4layers + full",
+        "args": {
+          "sft_type": "full",
+          "lisa_activated_layers": 4,
+          "lisa_step_interval": 20
+        }
+      },
+      {
+        "name": "full+no_mix",
+        "info": "全参无混合数据集",
+        "requirements":{
+          "gpu": "1",
+          "ddp": "1"
+        },
+        "args": {
+          "sft_type": "full",
+          "train_dataset_mix_ratio": 0.0
+        }
+      },
+      {
+        "name": "unsloth+lora+q4",
+        "info": "unsloth lora quantization bit 4",
+        "args": {
+          "sft_type": "lora",
+          "tuner_backend": "unsloth",
+          "quantization_bit": 4,
+          "model_type": "llama3-8b-instruct"
+        }
+      },
+      {
+        "name": "unsloth+full",
+        "info": "unsloth full",
+        "args": {
+          "sft_type": "full",
+          "tuner_backend": "unsloth",
+          "model_type": "llama3-8b-instruct"
+        }
+      }
+    ]
+}
--- a/swift-main/scripts/benchmark/exp.py
+++ b/swift-main/scripts/benchmark/exp.py
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import argparse
+import os
+import os.path
+
+from exp_utils import ExpManager, find_all_config
+
+from swift.utils import *
+
+logger = get_logger()
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Simple args for swift experiments.')
+    parser.add_argument(
+        '--config',
+        type=str,
+        default=None,
+        required=True,
+        help='The experiment config file',
+    )
+    parser.add_argument(
+        '--save_dir',
+        type=str,
+        default='./experiment',
+        required=False,
+        help='The experiment output folder',
+    )
+
+    args = parser.parse_args()
+    return args
+
+
+def llm_exp():
+    args = parse_args()
+    config: str = args.config
+    config = config.split(',')
+    os.makedirs(args.save_dir, exist_ok=True)
+    all_configs = []
+    if not isinstance(config, list):
+        config = [config]
+    for dir_or_file in config:
+        all_configs.extend(find_all_config(dir_or_file))
+    args.config = all_configs
+    exp_manager = ExpManager()
+    exp_manager.begin(args)
+
+
+if __name__ == '__main__':
+    llm_exp()