Initial commit

1bfbcff0 · wanglch · 1bfbcff0 · 1bfbcff0 · 1bfbcff0 · 1bfbcff0
Commit 1bfbcff0 authored Jun 13, 2024 by wanglch
20 changed files
--- a/swift-main/examples/pytorch/llm/scripts/yi_6b_chat/llamapro/sft.sh
+++ b/swift-main/examples/pytorch/llm/scripts/yi_6b_chat/llamapro/sft.sh
+# Experimental environment: A100
+# 25GB GPU memory
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python llm_sft.py \
+    --model_type  yi-6b-chat \
+    --sft_type  llamapro \
+    --tuner_backend  swift \
+    --dtype  AUTO \
+    --output_dir  output \
+    --dataset  ms-agent \
+    --llamapro_num_new_blocks  4 \
+    --use_loss_scale  true \
+    --train_dataset_sample  50000 \
+    --train_dataset_mix_ratio  2.0 \
+    --num_train_epochs 2 \
+    --max_length  2048 \
+    --check_dataset_strategy  warning \
+    --gradient_checkpointing  true \
+    --batch_size  1 \
+    --weight_decay  0.1 \
+    --learning_rate  1e-4 \
+    --gradient_accumulation_steps  16 \
+    --max_grad_norm  0.5 \
+    --warmup_ratio  0.03 \
+    --eval_steps  500 \
+    --save_steps  500 \
+    --save_total_limit  2 \
+    --logging_steps  10 \
--- a/swift-main/examples/pytorch/llm/scripts/yi_6b_chat/lorap/infer.sh
+++ b/swift-main/examples/pytorch/llm/scripts/yi_6b_chat/lorap/infer.sh
+# Experimental environment: A10
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python llm_infer.py \
+    --ckpt_dir "output/yi-6b-chat/vx-xxx/checkpoint-xxx" \
+    --load_dataset_config true \
+    --max_new_tokens 2048 \
+    --temperature 0.7 \
+    --top_p 0.7 \
+    --repetition_penalty 1. \
+    --do_sample true \
+    --merge_lora false \
--- a/swift-main/examples/pytorch/llm/scripts/yi_6b_chat/lorap/sft.sh
+++ b/swift-main/examples/pytorch/llm/scripts/yi_6b_chat/lorap/sft.sh
+# Experimental environment: A10, A100
+# 16GB GPU memory
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python llm_sft.py \
+    --model_type  yi-6b-chat \
+    --lora_lr_ratio 16.0 \
+    --sft_type  lora \
+    --tuner_backend  swift \
+    --dtype  AUTO \
+    --output_dir  output \
+    --dataset  ms-agent \
+    --use_loss_scale  true \
+    --train_dataset_sample  50000 \
+    --train_dataset_mix_ratio  2.0 \
+    --num_train_epochs 2 \
+    --max_length  2048 \
+    --check_dataset_strategy  warning \
+    --lora_rank  8 \
+    --lora_alpha  32 \
+    --lora_dropout_p  0.05 \
+    --lora_target_modules  ALL \
+    --gradient_checkpointing  true \
+    --batch_size  1 \
+    --weight_decay  0.1 \
+    --learning_rate  1e-4 \
+    --gradient_accumulation_steps  16 \
+    --max_grad_norm  0.5 \
+    --warmup_ratio  0.03 \
+    --eval_steps  500 \
+    --save_steps  500 \
+    --save_total_limit  2 \
+    --logging_steps  10 \
--- a/swift-main/examples/pytorch/llm/scripts/yi_9b/lora_ddp/infer.sh
+++ b/swift-main/examples/pytorch/llm/scripts/yi_9b/lora_ddp/infer.sh
+# Experimental environment: A100
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --ckpt_dir "output/yi-9b/vx-xxx/checkpoint-xxx" \
+    --load_dataset_config true \
+    --use_flash_attn true \
+    --max_new_tokens 2048 \
+    --temperature 0.3 \
+    --top_p 0.7 \
+    --repetition_penalty 1. \
+    --do_sample true \
+    --merge_lora false \
--- a/swift-main/examples/pytorch/llm/scripts/yi_9b/lora_ddp/sft.sh
+++ b/swift-main/examples/pytorch/llm/scripts/yi_9b/lora_ddp/sft.sh
+# Experimental environment: 4 * A100
+# 4 * 30GB GPU memory
+# Train a chat model with agent capabilities and self-cognition from the base.
+nproc_per_node=4
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model_type yi-9b \
+    --sft_type lora \
+    --tuner_backend peft \
+    --template_type default \
+    --dtype AUTO \
+    --output_dir output \
+    --dataset ms-agent \
+    --train_dataset_sample 20000 \
+    --train_dataset_mix_ratio 2 \
+    --num_train_epochs 3 \
+    --max_length 4096 \
+    --check_dataset_strategy warning \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --lora_dropout_p 0.05 \
+    --lora_target_modules ALL \
+    --lora_modules_to_save EMBEDDING LN \
+    --gradient_checkpointing true \
+    --batch_size 1 \
+    --weight_decay 0.1 \
+    --weight_decay 0.1 \
+    --learning_rate 5e-5 \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
+    --use_flash_attn false \
+    --self_cognition_sample 2000 \
+    --model_name 小黄 'Xiao Huang' \
+    --model_author 魔搭 ModelScope \
--- a/swift-main/examples/pytorch/llm/scripts/yi_9b/lora_zero3/infer.sh
+++ b/swift-main/examples/pytorch/llm/scripts/yi_9b/lora_zero3/infer.sh
+# Experimental environment: 2 * 3090
+CUDA_VISIBLE_DEVICES=0,1 \
+swift infer \
+    --ckpt_dir "output/yi-9b/vx-xxx/checkpoint-xxx" \
+    --load_dataset_config true \
+    --use_flash_attn true \
+    --max_new_tokens 2048 \
+    --temperature 0.3 \
+    --top_p 0.7 \
+    --repetition_penalty 1. \
+    --do_sample true \
+    --merge_lora false \
--- a/swift-main/examples/pytorch/llm/scripts/yi_9b/lora_zero3/sft.sh
+++ b/swift-main/examples/pytorch/llm/scripts/yi_9b/lora_zero3/sft.sh
+# Experimental environment: 4 * 3090
+# 4 * 22GB GPU memory
+# Train a chat model with agent capabilities and self-cognition from the base.
+nproc_per_node=4
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model_type yi-9b \
+    --sft_type lora \
+    --tuner_backend peft \
+    --template_type default \
+    --dtype AUTO \
+    --output_dir output \
+    --dataset ms-agent \
+    --train_dataset_sample 20000 \
+    --train_dataset_mix_ratio 2 \
+    --num_train_epochs 3 \
+    --max_length 4096 \
+    --check_dataset_strategy warning \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --lora_dropout_p 0.05 \
+    --lora_target_modules ALL \
+    --lora_modules_to_save EMBEDDING LN \
+    --gradient_checkpointing true \
+    --batch_size 1 \
+    --weight_decay 0.1 \
+    --weight_decay 0.1 \
+    --learning_rate 5e-5 \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
+    --use_flash_attn false \
+    --self_cognition_sample 2000 \
+    --deepspeed default-zero3 \
+    --model_name 小黄 'Xiao Huang' \
+    --model_author 魔搭 ModelScope \
--- a/swift-main/examples/pytorch/llm/scripts/yi_vl_6b_chat/lora/infer.sh
+++ b/swift-main/examples/pytorch/llm/scripts/yi_vl_6b_chat/lora/infer.sh
+# Experimental environment: V100, A10, 3090
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --ckpt_dir "output/yi-vl-6b-chat/vx-xxx/checkpoint-xxx" \
+    --load_dataset_config true \
+    --use_flash_attn false \
+    --max_new_tokens 2048 \
+    --temperature 0.5 \
+    --top_p 0.7 \
+    --repetition_penalty 1. \
+    --do_sample true \
+    --merge_lora false \
--- a/swift-main/examples/pytorch/llm/scripts/yi_vl_6b_chat/lora/sft.sh
+++ b/swift-main/examples/pytorch/llm/scripts/yi_vl_6b_chat/lora/sft.sh
+# Experimental environment: V100, A10, 3090
+# 18GB GPU memory
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model_type yi-vl-6b-chat \
+    --sft_type lora \
+    --tuner_backend peft \
+    --template_type AUTO \
+    --dtype AUTO \
+    --output_dir output \
+    --dataset coco-en-2-mini \
+    --train_dataset_sample -1 \
+    --num_train_epochs 1 \
+    --max_length 2048 \
+    --check_dataset_strategy warning \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --lora_dropout_p 0.05 \
+    --lora_target_modules DEFAULT \
+    --gradient_checkpointing true \
+    --batch_size 1 \
+    --weight_decay 0.1 \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps 16 \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
+    --use_flash_attn false \
--- a/swift-main/examples/pytorch/llm/scripts/yuan2_2b_janus_instruct/infer.sh
+++ b/swift-main/examples/pytorch/llm/scripts/yuan2_2b_janus_instruct/infer.sh
+# Experimental environment: A10
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --ckpt_dir "output/yuan2-2b-janus-instruct/vx-xxx/checkpoint-xxx" \
+    --load_dataset_config true \
+    --use_flash_attn false \
+    --max_new_tokens 2048 \
+    --do_sample false \
+    --merge_lora false \
--- a/swift-main/examples/pytorch/llm/scripts/yuan2_2b_janus_instruct/sft.sh
+++ b/swift-main/examples/pytorch/llm/scripts/yuan2_2b_janus_instruct/sft.sh
+# Experimental environment: A10
+# 7GB GPU memory
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model_type yuan2-2b-janus-instruct \
+    --sft_type lora \
+    --template_type AUTO \
+    --dataset hc3-zh \
+    --train_dataset_sample 20000 \
+    --eval_steps 100 \
+    --output_dir output \
+    --num_train_epochs 1 \
+    --max_length 2048 \
+    --learning_rate 1e-4 \
+    --use_flash_attn false \
+    --lora_target_modules ALL \
--- a/swift-main/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh
+++ b/swift-main/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh
+# Experimental environment: A10
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python llm_infer.py \
+    --ckpt_dir "output/ziya2-13b-chat/vx-xxx/checkpoint-xxx" \
+    --load_dataset_config true \
+    --max_new_tokens 2048 \
+    --temperature 0.7 \
+    --top_p 0.7 \
+    --repetition_penalty 1. \
+    --do_sample true \
+    --merge_lora false \
--- a/swift-main/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/sft.sh
+++ b/swift-main/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/sft.sh
+# Experimental environment: A10
+# 12GB GPU memory
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python llm_sft.py \
+    --model_id_or_path Fengshenbang/Ziya2-13B-Chat \
+    --model_revision master \
+    --sft_type lora \
+    --tuner_backend peft \
+    --template_type AUTO \
+    --dtype AUTO \
+    --output_dir output \
+    --dataset lawyer-llama-zh \
+    --train_dataset_sample -1 \
+    --num_train_epochs 1 \
+    --max_length 2048 \
+    --check_dataset_strategy warning \
+    --quantization_bit 4 \
+    --bnb_4bit_comp_dtype AUTO \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --lora_dropout_p 0.05 \
+    --lora_target_modules ALL \
+    --gradient_checkpointing true \
+    --batch_size 1 \
+    --weight_decay 0.1 \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps 16 \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
--- a/swift-main/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh
+++ b/swift-main/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh
+# Experimental environment: A10
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python llm_infer.py \
+    --ckpt_dir "output/ziya2-13b-chat/vx-xxx/checkpoint-xxx" \
+    --load_dataset_config true \
+    --max_new_tokens 2048 \
+    --temperature 0.7 \
+    --top_p 0.7 \
+    --repetition_penalty 1. \
+    --do_sample true \
+    --merge_lora false \
--- a/swift-main/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/sft.sh
+++ b/swift-main/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/sft.sh
+# Experimental environment: 2 * A10
+# 2 * 13GB GPU memory
+nproc_per_node=2
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0,1 \
+torchrun \
+    --nproc_per_node=$nproc_per_node \
+    --master_port 29500 \
+    llm_sft.py \
+    --model_id_or_path Fengshenbang/Ziya2-13B-Chat \
+    --model_revision master \
+    --sft_type lora \
+    --tuner_backend peft \
+    --template_type AUTO \
+    --dtype AUTO \
+    --output_dir output \
+    --ddp_backend nccl \
+    --dataset lawyer-llama-zh \
+    --train_dataset_sample -1 \
+    --num_train_epochs 1 \
+    --max_length 2048 \
+    --check_dataset_strategy warning \
+    --quantization_bit 4 \
+    --bnb_4bit_comp_dtype AUTO \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --lora_dropout_p 0.05 \
+    --lora_target_modules ALL \
+    --gradient_checkpointing true \
+    --batch_size 1 \
+    --weight_decay 0.1 \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
+    --deepspeed default-zero2 \
--- a/swift-main/examples/pytorch/multi_modal/notebook/resources/images/dog.jpeg
+++ b/swift-main/examples/pytorch/multi_modal/notebook/resources/images/dog.jpeg
--- a/swift-main/examples/pytorch/multi_modal/notebook/resources/images/flatillustration.jpeg
+++ b/swift-main/examples/pytorch/multi_modal/notebook/resources/images/flatillustration.jpeg
--- a/swift-main/examples/pytorch/multi_modal/notebook/resources/images/lora.png
+++ b/swift-main/examples/pytorch/multi_modal/notebook/resources/images/lora.png
--- a/swift-main/examples/pytorch/multi_modal/notebook/resources/images/sd.jpg
+++ b/swift-main/examples/pytorch/multi_modal/notebook/resources/images/sd.jpg
--- a/swift-main/examples/pytorch/multi_modal/notebook/text_to_image_synthesis.ipynb
+++ b/swift-main/examples/pytorch/multi_modal/notebook/text_to_image_synthesis.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a20d7d29-4c93-4901-9967-bcdfebb958dd",
+   "metadata": {},
+   "source": [
+    "# SWIFT在【多模态领域】的应用"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0b03d07c-b019-45ba-b06d-9ef5ddab30ce",
+   "metadata": {},
+   "source": [
+    "#### 使用样例见：https://github.com/modelscope/swift/tree/main/examples/pytorch/multi_modal/notebook"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3e72ce3a-bf5a-45d4-9dc0-b1d17be4ec73",
+   "metadata": {},
+   "source": [
+    "## 1. 文本生成图像"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6ad5a1bb-24e7-4281-9e8d-243ba2fa7ef9",
+   "metadata": {},
+   "source": [
+    "### 1.1 安装与导入包"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "50c08154-69fb-4b2f-96d4-54ee9a4a8a4d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# basic / third-party\n",
+    "import os\n",
+    "from  matplotlib import pyplot as plt\n",
+    "%matplotlib inline\n",
+    "\n",
+    "# SWIFT\n",
+    "from swift import Swift, SwiftModel, snapshot_download, push_to_hub\n",
+    "from swift import AdapterConfig, LoRAConfig, PromptConfig, SideConfig, ResTuningConfig\n",
+    "\n",
+    "# Modelscope\n",
+    "import modelscope\n",
+    "from modelscope.pipelines import pipeline\n",
+    "from modelscope.models import Model\n",
+    "from modelscope.utils.config import Config\n",
+    "from modelscope.metainfo import Trainers\n",
+    "from modelscope.msdatasets import MsDataset\n",
+    "from modelscope.trainers import build_trainer\n",
+    "from modelscope.utils.constant import ModelFile\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b91d8889-1fda-4f68-bb17-b18e38f18009",
+   "metadata": {},
+   "source": [
+    "### 1.2 Stable Diffusion\n",
+    "\n",
+    "High-Resolution Image Synthesis with Latent Diffusion Models\n",
+    "\n",
+    "<img src=\"resources/images/sd.jpg\" width=\"800\" align=\"middle\" />"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "79486a84-6f9a-4aff-b338-4365b0f9037a",
+   "metadata": {},
+   "source": [
+    "### 1.3 特定主体训练"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2297501c-9f4c-471d-a455-037db929e150",
+   "metadata": {},
+   "source": [
+    "### 1.3.1 数据集\n",
+    "- Dogs: https://modelscope.cn/datasets/buptwq/lora-stable-diffusion-finetune/summary\n",
+    "\n",
+    "<img src=\"resources/images/dog.jpeg\" width=\"200\" align=\"middle\" />\n",
+    "\n",
+    "- 加载数据集"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "88880eb4-8d8d-4ca4-b9ac-5de6fa875e0a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_dataset = MsDataset.load(\n",
+    "        'buptwq/lora-stable-diffusion-finetune',\n",
+    "        split='train',\n",
+    "        ).remap_columns({'Text': 'prompt'})\n",
+    "eval_dataset = MsDataset.load(\n",
+    "        'buptwq/lora-stable-diffusion-finetune',\n",
+    "        split='validation',\n",
+    "        ).remap_columns({'Text': 'prompt'})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "997d9799-f05f-4b28-aaa4-628ba11e4551",
+   "metadata": {},
+   "source": [
+    "### 1.3.2 使用modelscope加载SD模型"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15b214cf-0146-4bbe-b773-f8acbd2e1df5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-base'\n",
+    "task = 'efficient-diffusion-tuning'\n",
+    "revision = 'v1.0.1'\n",
+    "\n",
+    "model_dir = snapshot_download(model_id)\n",
+    "cfg_dict = Config.from_file(os.path.join(model_dir, ModelFile.CONFIGURATION))\n",
+    "cfg_dict.model.inference = False\n",
+    "model = Model.from_pretrained(model_id, cfg_dict=cfg_dict, revision=revision)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2ad596b0-add6-4050-a279-af07139484c7",
+   "metadata": {},
+   "source": [
+    "#### 1.3.2 查看模型信息"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7a0a0f53-b04b-4b57-ac51-767bcf5412f9",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "print(model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5532d110-ac97-4585-b22b-9c74db62680d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "module_keys = [key for key, _ in model.named_modules()]\n",
+    "print(module_keys)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "574e6445-5a4e-45d0-af87-496e66442d4b",
+   "metadata": {},
+   "source": [
+    "#### 1.3.3 配置SwiftConfig + 模型准备"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3c847c3e-d27d-4639-8e4e-dcdd7b590ae6",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "#### Swift - Transformer - LoRA\n",
+    "\n",
+    "<img src=\"resources/images/lora.png\" width=\"500\" align=\"middle\" />"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "05a27382-c660-4e54-9a2d-9598944d505e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# unet\n",
+    "lora_config = LoRAConfig(\n",
+    "    r=16,\n",
+    "    target_modules=\".*unet.*.(to_q|to_k|to_v|to_out.0)$\"\n",
+    ")\n",
+    "model = Swift.prepare_model(model, lora_config)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4883fac3-ded7-4440-9954-8c74c3f8bc08",
+   "metadata": {},
+   "source": [
+    "#### 1.3.4 查看微调模型信息"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "183cfa99-ff3c-4fd5-920e-1000c4590528",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "print(model)\n",
+    "print(model.get_trainable_parameters())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8a46fac1-288a-4edd-a3d0-99cb5e961c5f",
+   "metadata": {},
+   "source": [
+    "#### 1.3.5 训练"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "599424c8-dd42-426c-a3e2-5d2383908e92",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def cfg_modify_fn(cfg):\n",
+    "    cfg.preprocessor.resolution = 512\n",
+    "    cfg.train.lr_scheduler = {\n",
+    "        'type': 'LambdaLR',\n",
+    "        'lr_lambda': lambda _: 1,\n",
+    "        'last_epoch': -1\n",
+    "    }\n",
+    "    cfg.train.max_epochs = 100\n",
+    "    cfg.train.optimizer.lr = 1e-4\n",
+    "    cfg.model.inference = False\n",
+    "    cfg.model.pretrained_tuner = None\n",
+    "    trainer_hook = cfg.train.hooks\n",
+    "    trainer_hook.append({\"type\": \"SwiftHook\"})\n",
+    "    cfg.train.hooks = trainer_hook\n",
+    "    return cfg\n",
+    "\n",
+    "work_dir = \"tmp/multimodal_swift_lora_1\"\n",
+    "kwargs = dict(\n",
+    "    model=model,\n",
+    "    cfg_file=os.path.join(model_dir, 'configuration.json'),\n",
+    "    work_dir=work_dir,\n",
+    "    train_dataset=train_dataset,\n",
+    "    eval_dataset=train_dataset,\n",
+    "    cfg_modify_fn=cfg_modify_fn\n",
+    ")\n",
+    "\n",
+    "trainer = build_trainer(name='efficient-diffusion-tuning', default_args=kwargs)\n",
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "911494c9-0244-476d-adc9-4bfcddb4d6f8",
+   "metadata": {},
+   "source": [
+    "#### 1.3.6 测试"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "86bb91d5-7df3-4728-98b4-4869a3da5c9f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 加载modelscope基础模型\n",
+    "model_dir = snapshot_download(model_id)\n",
+    "cfg_dict = Config.from_file(os.path.join(model_dir, ModelFile.CONFIGURATION))\n",
+    "cfg_dict.model.inference = True\n",
+    "model = Model.from_pretrained(model_id, cfg_dict=cfg_dict, revision=revision)\n",
+    "\n",
+    "# 创建modelscope pipeline进行推理\n",
+    "pipe = pipeline(task=\"efficient-diffusion-tuning\", model=model)\n",
+    "\n",
+    "# 推理流程\n",
+    "test_prompt = \"a dog\"\n",
+    "img_out = pipe({'prompt': test_prompt}, num_inference_steps=50, generator_seed=123)['output_imgs'][0][:,:,::-1]\n",
+    "\n",
+    "# 展示图片\n",
+    "plt.xticks([]), plt.yticks([])\n",
+    "plt.imshow(img_out)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "49eda830-fb29-422f-85f2-750e91c09f99",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 加载Swift-Tuner模型参数\n",
+    "model = Swift.from_pretrained(model, os.path.join(work_dir, 'output_swift'))\n",
+    "\n",
+    "pipe = pipeline(task=\"efficient-diffusion-tuning\", model=model)\n",
+    "\n",
+    "test_prompt = \"a dog\"\n",
+    "img_out = pipe({'prompt': test_prompt}, num_inference_steps=50, generator_seed=123)['output_imgs'][0][:,:,::-1]\n",
+    "plt.xticks([]), plt.yticks([])\n",
+    "plt.imshow(img_out)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "85bdb758-190e-49e5-aad8-3a43702e9604",
+   "metadata": {},
+   "source": [
+    "### 1.4 特定风格训练"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9c46868d-60a1-4448-a875-9362cb8f5701",
+   "metadata": {},
+   "source": [
+    "### 1.4.1 数据集\n",
+    "- Styles: https://modelscope.cn/datasets/damo/style_custom_dataset/summary\n",
+    "\n",
+    "<img src=\"resources/images/flatillustration.jpeg\" width=\"200\" align=\"middle\" />\n",
+    "\n",
+    "- 加载数据集"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4afe304d-a274-4b5b-b3aa-96cd0e05fcc8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_dataset = MsDataset.load(\n",
+    "    'style_custom_dataset',\n",
+    "    namespace='damo',\n",
+    "    split='train',\n",
+    "    subset_name='Flatillustration'\n",
+    ").remap_columns({'Image:FILE': 'target:FILE'})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "93090fc3-8d83-4064-a878-b5e826b5be4a",
+   "metadata": {},
+   "source": [
+    "### 1.4.2 使用 modelscope + Swift 准备微调模型"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8ec7adb-fc8e-4f01-ad9f-ca4614aa802e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-base'\n",
+    "task = 'efficient-diffusion-tuning'\n",
+    "revision = 'v1.0.1'\n",
+    "\n",
+    "model_dir = snapshot_download(model_id)\n",
+    "cfg_dict = Config.from_file(os.path.join(model_dir, ModelFile.CONFIGURATION))\n",
+    "cfg_dict.model.inference = False\n",
+    "model = Model.from_pretrained(model_id, cfg_dict=cfg_dict, revision=revision)\n",
+    "\n",
+    "# unet + text_encoder\n",
+    "lora_config = LoRAConfig(\n",
+    "    r=128,\n",
+    "    lora_alpha=128,\n",
+    "    target_modules=\"(.*unet.*.(to_q|to_k|to_v|to_out.0)$)|(.*text_encoder.*.(q_proj|k_proj|v_proj|out_proj)$)\"\n",
+    ")\n",
+    "model = Swift.prepare_model(model, lora_config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3a2ece30-12d5-4a78-b17e-c987af3fee44",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "print(model)\n",
+    "print(model.get_trainable_parameters())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d704e2b5-5bb7-489d-b198-a80a1164aef1",
+   "metadata": {},
+   "source": [
+    "#### 1.4.3 训练"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "648e4b79-4014-4f05-94f6-bae6fddee2d0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def cfg_modify_fn(cfg):\n",
+    "    cfg.preprocessor.resolution = 512\n",
+    "    cfg.train.lr_scheduler = {\n",
+    "        'type': 'LambdaLR',\n",
+    "        'lr_lambda': lambda _: 1,\n",
+    "        'last_epoch': -1\n",
+    "    }\n",
+    "    cfg.train.max_epochs = 100\n",
+    "    cfg.train.optimizer.lr = 1e-4\n",
+    "    cfg.model.inference = False\n",
+    "    cfg.model.pretrained_tuner = None\n",
+    "    trainer_hook = cfg.train.hooks\n",
+    "    trainer_hook.append({\"type\": \"SwiftHook\"})\n",
+    "    cfg.train.hooks = trainer_hook\n",
+    "    return cfg\n",
+    "\n",
+    "work_dir = \"tmp/multimodal_swift_lora_2\"\n",
+    "kwargs = dict(\n",
+    "    model=model,\n",
+    "    cfg_file=os.path.join(model_dir, 'configuration.json'),\n",
+    "    work_dir=work_dir,\n",
+    "    train_dataset=train_dataset,\n",
+    "    cfg_modify_fn=cfg_modify_fn\n",
+    ")\n",
+    "\n",
+    "trainer = build_trainer(name='efficient-diffusion-tuning', default_args=kwargs)\n",
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c56c2f36-5507-46f1-9c20-428c7fa367c5",
+   "metadata": {},
+   "source": [
+    "#### 1.4.4 测试"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53c02514-6697-4153-86e8-22b123597cc8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_dir = snapshot_download(model_id)\n",
+    "cfg_dict = Config.from_file(os.path.join(model_dir, ModelFile.CONFIGURATION))\n",
+    "cfg_dict.model.inference = True\n",
+    "model = Model.from_pretrained(model_id, cfg_dict=cfg_dict, revision=revision)\n",
+    "model = Swift.from_pretrained(model, os.path.join(work_dir, 'output_swift'))\n",
+    "\n",
+    "pipe = pipeline(task=\"efficient-diffusion-tuning\", model=model)\n",
+    "\n",
+    "test_prompt = \"a dog\"\n",
+    "img_out = pipe({'prompt': test_prompt}, num_inference_steps=50, generator_seed=123)['output_imgs'][0][:,:,::-1]\n",
+    "plt.xticks([]), plt.yticks([])\n",
+    "plt.imshow(img_out)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "05e53c3d-2a24-4102-8925-d057ba871ecd",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "aigc_env",
+   "language": "python",
+   "name": "aigc_env"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}