Commit 1bfbcff0 authored by wanglch's avatar wanglch
Browse files

Initial commit

parents
Pipeline #1204 canceled with stages
import os
import cv2
from modelscope.models import Model
from modelscope.msdatasets import MsDataset
from modelscope.pipelines import pipeline
from modelscope.trainers import build_trainer
from modelscope.utils.config import Config
from modelscope.utils.constant import ModelFile
from swift import LoRAConfig, SCETuningConfig, Swift, snapshot_download
# load dataset
train_dataset = MsDataset.load(
'style_custom_dataset', namespace='damo', subset_name='3D',
split='train_short').remap_columns({'Image:FILE': 'Target:FILE'})
# load pretrained model
model_id = 'damo/multi-modal_efficient-diffusion-tuning-swift-base'
task = 'efficient-diffusion-tuning'
revision = 'v1.0.1'
model_dir = snapshot_download(model_id)
cfg_dict = Config.from_file(os.path.join(model_dir, ModelFile.CONFIGURATION))
cfg_dict.model.inference = False
model = Model.from_pretrained(model_id, cfg_dict=cfg_dict, revision=revision)
# init tuner
tuner_type = 'scetuning' # "lora"
if tuner_type == 'lora':
work_dir = 'tmp/multimodal_swift_lora_style'
tuner_config = LoRAConfig(r=64, target_modules='.*unet.*.(to_q|to_k|to_v|to_out.0|net.0.proj|net.2)$')
model = Swift.prepare_model(model, tuner_config)
elif tuner_type == 'scetuning':
work_dir = 'tmp/multimodal_swift_scetuning_style'
tuner_config = SCETuningConfig(
dims=[1280, 1280, 1280, 1280, 1280, 640, 640, 640, 320, 320, 320, 320],
target_modules=r'^unet\.up_blocks\.\d+\.resnets\.\d+$')
model = Swift.prepare_model(model, tuner_config)
else:
raise Exception(f'Error tuner type: {tuner_type}')
# training
def cfg_modify_fn(cfg):
cfg.preprocessor.resolution = 512
cfg.train.lr_scheduler = {'type': 'LambdaLR', 'lr_lambda': lambda _: 1, 'last_epoch': -1}
cfg.train.max_epochs = 100
cfg.train.optimizer.lr = 1e-4
cfg.train.dataloader.batch_size_per_gpu = 10
cfg.model.inference = False
cfg.model.pretrained_tuner = None
trainer_hook = cfg.train.hooks
trainer_hook.append({'type': 'SwiftHook'})
trainer_hook.append({'type': 'CheckpointHook', 'interval': 50})
cfg.train.hooks = trainer_hook
return cfg
kwargs = dict(
model=model,
cfg_file=os.path.join(model_dir, 'configuration.json'),
work_dir=work_dir,
train_dataset=train_dataset,
eval_dataset=train_dataset,
cfg_modify_fn=cfg_modify_fn)
trainer = build_trainer(name='efficient-diffusion-tuning', default_args=kwargs)
trainer.train()
# inference
work_dir = os.path.join(work_dir, 'output_swift')
model_dir = snapshot_download(model_id)
cfg_dict = Config.from_file(os.path.join(model_dir, ModelFile.CONFIGURATION))
cfg_dict.model.inference = True
model = Model.from_pretrained(model_id, cfg_dict=cfg_dict, revision=revision)
model = Swift.from_pretrained(model, work_dir)
pipe = pipeline(task='efficient-diffusion-tuning', model=model)
test_prompt = 'A boy in a camouflage jacket with a scarf'
img_out = pipe({'prompt': test_prompt}, num_inference_steps=50, generator_seed=123)['output_imgs'][0]
cv2.imwrite(os.path.join(work_dir, 'inference.png'), img_out)
# Copyright (c) Alibaba, Inc. and its affiliates.
from swift.aigc import infer_controlnet
if __name__ == '__main__':
infer_controlnet()
# Copyright (c) Alibaba, Inc. and its affiliates.
from swift.aigc import infer_controlnet_sdxl
if __name__ == '__main__':
infer_controlnet_sdxl()
# Copyright (c) Alibaba, Inc. and its affiliates.
from swift.aigc import infer_dreambooth
if __name__ == '__main__':
infer_dreambooth()
# Copyright (c) Alibaba, Inc. and its affiliates.
from swift.aigc import infer_dreambooth_lora
if __name__ == '__main__':
infer_dreambooth_lora()
# Copyright (c) Alibaba, Inc. and its affiliates.
from swift.aigc import infer_dreambooth_lora_sdxl
if __name__ == '__main__':
infer_dreambooth_lora_sdxl()
# Copyright (c) Alibaba, Inc. and its affiliates.
from swift.aigc import infer_text_to_image
if __name__ == '__main__':
infer_text_to_image()
# Copyright (c) Alibaba, Inc. and its affiliates.
from swift.aigc import infer_text_to_image_lora
if __name__ == '__main__':
infer_text_to_image_lora()
# Copyright (c) Alibaba, Inc. and its affiliates.
from swift.aigc import infer_text_to_image_lora_sdxl
if __name__ == '__main__':
infer_text_to_image_lora_sdxl()
# Copyright (c) Alibaba, Inc. and its affiliates.
from swift.aigc import infer_text_to_image_sdxl
if __name__ == '__main__':
infer_text_to_image_sdxl()
PYTHONPATH=../../.. \
CUDA_VISIBLE_DEVICES=0 \
python infer_controlnet.py \
--base_model_path "AI-ModelScope/stable-diffusion-v1-5" \
--controlnet_path "train_controlnet" \
--prompt "pale golden rod circle with old lace background" \
--control_image_path "conditioning_image_1.png" \
--image_save_path "output.png" \
--torch_dtype "fp16" \
--seed 0 \
PYTHONPATH=../../.. \
CUDA_VISIBLE_DEVICES=0 \
python infer_controlnet_sdxl.py \
--base_model_path "AI-ModelScope/stable-diffusion-xl-base-1.0" \
--controlnet_path "train_controlnet_sdxl" \
--prompt "pale golden rod circle with old lace background" \
--control_image_path "conditioning_image_1.png" \
--image_save_path "output.png" \
--torch_dtype "fp16" \
--seed 0 \
PYTHONPATH=../../.. \
CUDA_VISIBLE_DEVICES=0 \
python infer_dreambooth.py \
--model_path "train_dreambooth" \
--prompt "A photo of sks dog in a bucket" \
--image_save_path "dog-bucket.png" \
--torch_dtype "fp16" \
PYTHONPATH=../../.. \
CUDA_VISIBLE_DEVICES=0 \
python infer_dreambooth_lora.py \
--base_model_path "AI-ModelScope/stable-diffusion-v1-5" \
--lora_model_path "train_dreambooth_lora" \
--prompt "A picture of a sks dog in a bucket" \
--image_save_path "dog-bucket.png" \
--torch_dtype "fp16" \
PYTHONPATH=../../.. \
CUDA_VISIBLE_DEVICES=0 \
python infer_dreambooth_lora_sdxl.py \
--base_model_path "AI-ModelScope/stable-diffusion-xl-base-1.0" \
--lora_model_path "train_dreambooth_lora_sdxl" \
--prompt "A picture of a sks dog in a bucket" \
--image_save_path "sks_dog.png" \
--torch_dtype "fp16" \
PYTHONPATH=../../.. \
CUDA_VISIBLE_DEVICES=0 \
python infer_text_to_image.py \
--pretrained_model_name_or_path "AI-ModelScope/stable-diffusion-v1-5" \
--unet_model_path "train_text_to_image/checkpoint-15000/unet" \
--prompt "yoda" \
--image_save_path "yoda-pokemon.png" \
--torch_dtype "fp16" \
PYTHONPATH=../../.. \
CUDA_VISIBLE_DEVICES=0 \
python infer_text_to_image_lora.py \
--pretrained_model_name_or_path "AI-ModelScope/stable-diffusion-v1-5" \
--lora_model_path "train_text_to_image_lora/checkpoint-80000" \
--prompt "A pokemon with green eyes and red legs." \
--image_save_path "lora_pokemon.png" \
--torch_dtype "fp16" \
PYTHONPATH=../../.. \
CUDA_VISIBLE_DEVICES=0 \
python infer_text_to_image_lora_sdxl.py \
--pretrained_model_name_or_path "AI-ModelScope/stable-diffusion-xl-base-1.0" \
--lora_model_path "train_text_to_image_lora_sdxl/unet" \
--prompt "A pokemon with green eyes and red legs." \
--image_save_path "sdxl_lora_pokemon.png" \
--torch_dtype "fp16" \
PYTHONPATH=../../.. \
CUDA_VISIBLE_DEVICES=0 \
python infer_text_to_image_sdxl.py \
--pretrained_model_name_or_path "AI-ModelScope/stable-diffusion-xl-base-1.0" \
--unet_model_path "train_text_to_image_sdxl/checkpoint-10000/unet" \
--prompt "A pokemon with green eyes and red legs." \
--image_save_path "sdxl_pokemon.png" \
--torch_dtype "fp16" \
PYTHONPATH=../../.. \
accelerate launch train_controlnet.py \
--pretrained_model_name_or_path="AI-ModelScope/stable-diffusion-v1-5" \
--output_dir="train_controlnet" \
--dataset_name="AI-ModelScope/controlnet_dataset_condition_fill50k" \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
--validation_prompt "red circle with blue background" "cyan circle with brown floral background" \
--train_batch_size=4 \
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment