Initial commit

1bfbcff0 · wanglch · 1bfbcff0 · 1bfbcff0 · 1bfbcff0 · 1bfbcff0
Commit 1bfbcff0 authored Jun 13, 2024 by wanglch
20 changed files
--- a/swift-main/examples/pytorch/multi_modal/notebook/text_to_image_synthesis.py
+++ b/swift-main/examples/pytorch/multi_modal/notebook/text_to_image_synthesis.py
+import os
+
+import cv2
+from modelscope.models import Model
+from modelscope.msdatasets import MsDataset
+from modelscope.pipelines import pipeline
+from modelscope.trainers import build_trainer
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile
+
+from swift import LoRAConfig, SCETuningConfig, Swift, snapshot_download
+
+# load dataset
+train_dataset = MsDataset.load(
+    'style_custom_dataset', namespace='damo', subset_name='3D',
+    split='train_short').remap_columns({'Image:FILE': 'Target:FILE'})
+
+# load pretrained model
+model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-base'
+task = 'efficient-diffusion-tuning'
+revision = 'v1.0.1'
+model_dir = snapshot_download(model_id)
+cfg_dict = Config.from_file(os.path.join(model_dir, ModelFile.CONFIGURATION))
+cfg_dict.model.inference = False
+model = Model.from_pretrained(model_id, cfg_dict=cfg_dict, revision=revision)
+
+# init tuner
+tuner_type = 'scetuning'  # "lora"
+
+if tuner_type == 'lora':
+    work_dir = 'tmp/multimodal_swift_lora_style'
+    tuner_config = LoRAConfig(r=64, target_modules='.*unet.*.(to_q|to_k|to_v|to_out.0|net.0.proj|net.2)$')
+    model = Swift.prepare_model(model, tuner_config)
+elif tuner_type == 'scetuning':
+    work_dir = 'tmp/multimodal_swift_scetuning_style'
+    tuner_config = SCETuningConfig(
+        dims=[1280, 1280, 1280, 1280, 1280, 640, 640, 640, 320, 320, 320, 320],
+        target_modules=r'^unet\.up_blocks\.\d+\.resnets\.\d+$')
+    model = Swift.prepare_model(model, tuner_config)
+else:
+    raise Exception(f'Error tuner type: {tuner_type}')
+
+
+# training
+def cfg_modify_fn(cfg):
+    cfg.preprocessor.resolution = 512
+    cfg.train.lr_scheduler = {'type': 'LambdaLR', 'lr_lambda': lambda _: 1, 'last_epoch': -1}
+    cfg.train.max_epochs = 100
+    cfg.train.optimizer.lr = 1e-4
+    cfg.train.dataloader.batch_size_per_gpu = 10
+    cfg.model.inference = False
+    cfg.model.pretrained_tuner = None
+    trainer_hook = cfg.train.hooks
+    trainer_hook.append({'type': 'SwiftHook'})
+    trainer_hook.append({'type': 'CheckpointHook', 'interval': 50})
+    cfg.train.hooks = trainer_hook
+    return cfg
+
+
+kwargs = dict(
+    model=model,
+    cfg_file=os.path.join(model_dir, 'configuration.json'),
+    work_dir=work_dir,
+    train_dataset=train_dataset,
+    eval_dataset=train_dataset,
+    cfg_modify_fn=cfg_modify_fn)
+trainer = build_trainer(name='efficient-diffusion-tuning', default_args=kwargs)
+trainer.train()
+
+# inference
+work_dir = os.path.join(work_dir, 'output_swift')
+model_dir = snapshot_download(model_id)
+cfg_dict = Config.from_file(os.path.join(model_dir, ModelFile.CONFIGURATION))
+cfg_dict.model.inference = True
+model = Model.from_pretrained(model_id, cfg_dict=cfg_dict, revision=revision)
+model = Swift.from_pretrained(model, work_dir)
+pipe = pipeline(task='efficient-diffusion-tuning', model=model)
+test_prompt = 'A boy in a camouflage jacket with a scarf'
+img_out = pipe({'prompt': test_prompt}, num_inference_steps=50, generator_seed=123)['output_imgs'][0]
+cv2.imwrite(os.path.join(work_dir, 'inference.png'), img_out)
--- a/swift-main/examples/pytorch/sdxl/infer_controlnet.py
+++ b/swift-main/examples/pytorch/sdxl/infer_controlnet.py
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from swift.aigc import infer_controlnet
+
+if __name__ == '__main__':
+    infer_controlnet()
--- a/swift-main/examples/pytorch/sdxl/infer_controlnet_sdxl.py
+++ b/swift-main/examples/pytorch/sdxl/infer_controlnet_sdxl.py
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from swift.aigc import infer_controlnet_sdxl
+
+if __name__ == '__main__':
+    infer_controlnet_sdxl()
--- a/swift-main/examples/pytorch/sdxl/infer_dreambooth.py
+++ b/swift-main/examples/pytorch/sdxl/infer_dreambooth.py
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from swift.aigc import infer_dreambooth
+
+if __name__ == '__main__':
+    infer_dreambooth()
--- a/swift-main/examples/pytorch/sdxl/infer_dreambooth_lora.py
+++ b/swift-main/examples/pytorch/sdxl/infer_dreambooth_lora.py
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from swift.aigc import infer_dreambooth_lora
+
+if __name__ == '__main__':
+    infer_dreambooth_lora()
--- a/swift-main/examples/pytorch/sdxl/infer_dreambooth_lora_sdxl.py
+++ b/swift-main/examples/pytorch/sdxl/infer_dreambooth_lora_sdxl.py
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from swift.aigc import infer_dreambooth_lora_sdxl
+
+if __name__ == '__main__':
+    infer_dreambooth_lora_sdxl()
--- a/swift-main/examples/pytorch/sdxl/infer_text_to_image.py
+++ b/swift-main/examples/pytorch/sdxl/infer_text_to_image.py
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from swift.aigc import infer_text_to_image
+
+if __name__ == '__main__':
+    infer_text_to_image()
--- a/swift-main/examples/pytorch/sdxl/infer_text_to_image_lora.py
+++ b/swift-main/examples/pytorch/sdxl/infer_text_to_image_lora.py
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from swift.aigc import infer_text_to_image_lora
+
+if __name__ == '__main__':
+    infer_text_to_image_lora()
--- a/swift-main/examples/pytorch/sdxl/infer_text_to_image_lora_sdxl.py
+++ b/swift-main/examples/pytorch/sdxl/infer_text_to_image_lora_sdxl.py
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from swift.aigc import infer_text_to_image_lora_sdxl
+
+if __name__ == '__main__':
+    infer_text_to_image_lora_sdxl()
--- a/swift-main/examples/pytorch/sdxl/infer_text_to_image_sdxl.py
+++ b/swift-main/examples/pytorch/sdxl/infer_text_to_image_sdxl.py
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from swift.aigc import infer_text_to_image_sdxl
+
+if __name__ == '__main__':
+    infer_text_to_image_sdxl()
--- a/swift-main/examples/pytorch/sdxl/scripts/run_infer_controlnet.sh
+++ b/swift-main/examples/pytorch/sdxl/scripts/run_infer_controlnet.sh
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python infer_controlnet.py \
+    --base_model_path "AI-ModelScope/stable-diffusion-v1-5" \
+    --controlnet_path "train_controlnet" \
+    --prompt "pale golden rod circle with old lace background" \
+    --control_image_path "conditioning_image_1.png" \
+    --image_save_path "output.png" \
+    --torch_dtype "fp16" \
+    --seed 0 \
--- a/swift-main/examples/pytorch/sdxl/scripts/run_infer_controlnet_sdxl.sh
+++ b/swift-main/examples/pytorch/sdxl/scripts/run_infer_controlnet_sdxl.sh
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python infer_controlnet_sdxl.py \
+    --base_model_path "AI-ModelScope/stable-diffusion-xl-base-1.0" \
+    --controlnet_path "train_controlnet_sdxl" \
+    --prompt "pale golden rod circle with old lace background" \
+    --control_image_path "conditioning_image_1.png" \
+    --image_save_path "output.png" \
+    --torch_dtype "fp16" \
+    --seed 0 \
--- a/swift-main/examples/pytorch/sdxl/scripts/run_infer_dreambooth.sh
+++ b/swift-main/examples/pytorch/sdxl/scripts/run_infer_dreambooth.sh
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python infer_dreambooth.py \
+    --model_path "train_dreambooth" \
+    --prompt "A photo of sks dog in a bucket" \
+    --image_save_path "dog-bucket.png" \
+    --torch_dtype "fp16" \
--- a/swift-main/examples/pytorch/sdxl/scripts/run_infer_dreambooth_lora.sh
+++ b/swift-main/examples/pytorch/sdxl/scripts/run_infer_dreambooth_lora.sh
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python infer_dreambooth_lora.py \
+    --base_model_path "AI-ModelScope/stable-diffusion-v1-5" \
+    --lora_model_path "train_dreambooth_lora" \
+    --prompt "A picture of a sks dog in a bucket" \
+    --image_save_path "dog-bucket.png" \
+    --torch_dtype "fp16" \
--- a/swift-main/examples/pytorch/sdxl/scripts/run_infer_dreambooth_lora_sdxl.sh
+++ b/swift-main/examples/pytorch/sdxl/scripts/run_infer_dreambooth_lora_sdxl.sh
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python infer_dreambooth_lora_sdxl.py \
+    --base_model_path "AI-ModelScope/stable-diffusion-xl-base-1.0" \
+    --lora_model_path "train_dreambooth_lora_sdxl" \
+    --prompt "A picture of a sks dog in a bucket" \
+    --image_save_path "sks_dog.png" \
+    --torch_dtype "fp16" \
--- a/swift-main/examples/pytorch/sdxl/scripts/run_infer_text_to_image.sh
+++ b/swift-main/examples/pytorch/sdxl/scripts/run_infer_text_to_image.sh
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python infer_text_to_image.py \
+    --pretrained_model_name_or_path "AI-ModelScope/stable-diffusion-v1-5" \
+    --unet_model_path "train_text_to_image/checkpoint-15000/unet" \
+    --prompt "yoda" \
+    --image_save_path "yoda-pokemon.png" \
+    --torch_dtype "fp16" \
--- a/swift-main/examples/pytorch/sdxl/scripts/run_infer_text_to_image_lora.sh
+++ b/swift-main/examples/pytorch/sdxl/scripts/run_infer_text_to_image_lora.sh
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python infer_text_to_image_lora.py \
+    --pretrained_model_name_or_path "AI-ModelScope/stable-diffusion-v1-5" \
+    --lora_model_path "train_text_to_image_lora/checkpoint-80000" \
+    --prompt "A pokemon with green eyes and red legs." \
+    --image_save_path "lora_pokemon.png" \
+    --torch_dtype "fp16" \
--- a/swift-main/examples/pytorch/sdxl/scripts/run_infer_text_to_image_lora_sdxl.sh
+++ b/swift-main/examples/pytorch/sdxl/scripts/run_infer_text_to_image_lora_sdxl.sh
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python infer_text_to_image_lora_sdxl.py \
+    --pretrained_model_name_or_path "AI-ModelScope/stable-diffusion-xl-base-1.0" \
+    --lora_model_path "train_text_to_image_lora_sdxl/unet" \
+    --prompt "A pokemon with green eyes and red legs." \
+    --image_save_path "sdxl_lora_pokemon.png" \
+    --torch_dtype "fp16" \
--- a/swift-main/examples/pytorch/sdxl/scripts/run_infer_text_to_image_sdxl.sh
+++ b/swift-main/examples/pytorch/sdxl/scripts/run_infer_text_to_image_sdxl.sh
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python infer_text_to_image_sdxl.py \
+    --pretrained_model_name_or_path "AI-ModelScope/stable-diffusion-xl-base-1.0" \
+    --unet_model_path "train_text_to_image_sdxl/checkpoint-10000/unet" \
+    --prompt "A pokemon with green eyes and red legs." \
+    --image_save_path "sdxl_pokemon.png" \
+    --torch_dtype "fp16" \
--- a/swift-main/examples/pytorch/sdxl/scripts/run_train_controlnet.sh
+++ b/swift-main/examples/pytorch/sdxl/scripts/run_train_controlnet.sh
+PYTHONPATH=../../.. \
+accelerate launch train_controlnet.py \
+    --pretrained_model_name_or_path="AI-ModelScope/stable-diffusion-v1-5" \
+    --output_dir="train_controlnet" \
+    --dataset_name="AI-ModelScope/controlnet_dataset_condition_fill50k" \
+    --resolution=512 \
+    --learning_rate=1e-5 \
+    --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
+    --validation_prompt "red circle with blue background" "cyan circle with brown floral background" \
+    --train_batch_size=4 \