Commit 4a14635e authored by wangkaixiong's avatar wangkaixiong 🚴🏼
Browse files

init

parent 7a12ad2a
*.png
\ No newline at end of file
# import tomesd
import torch
import time
import os
import pandas as pd
from diffusers import StableDiffusionPipeline, DDIMScheduler
#from xformers.ops import MemoryEfficientAttentionFlashAttentionOp, MemoryEfficientAttentionTritonFwdFlashBwOp
import torch._dynamo
torch._dynamo.config.suppress_errors = True
ddim_sample_path = "./DDIM-sample"
os.makedirs(ddim_sample_path, exist_ok=True)
generator = torch.manual_seed(2024)
#model_id = "/diffusers-sd2/stable-diffusion-2-1-base"
model_id = "/data1/models/stablediffusion/stable-diffusion-2-1-base"
text_file = "PartiPrompts.tsv"
df = pd.read_csv(text_file, sep='\t')
prompts = df['Prompt']
num_inference_steps = 50
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cuda")
from DeepCache import DeepCacheSDHelper
helper = DeepCacheSDHelper(pipe=pipe)
helper.set_params(
cache_interval=2,
cache_branch_id=0,
)
helper.enable()
###################################
#pipe.unet = torch.compile(pipe.unet,mode="max-autotune-no-cudagraphs")
# pipe.vae = torch.compile(pipe.vae,mode="max-autotune-no-cudagraphs")
###################################
base_count = 0
print("======================================start DDIM ==================================")
for prompt in prompts:
start = time.time()
image = pipe(prompt, 512, 512, num_inference_steps=num_inference_steps, num_images_per_prompt=1, generator=generator).images[0]
print(f"the {base_count} text-to-image use time {time.time()-start}")
image.save(os.path.join(ddim_sample_path, f"{base_count:05}.png"))
base_count += 1
if base_count == 50:
break
print(f"Your samples are ready and waiting for you here\n{ddim_sample_path} \n"
f" \nEnjoy.")
#import tomesd
import torch
import time
import os
import pandas as pd
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler, StableDiffusionXLControlNetPipeline, DiffusionPipeline
#from xformers.ops import MemoryEfficientAttentionFlashAttentionOp, MemoryEfficientAttentionTritonFwdFlashBwOp
# import torch._dynamo
# torch._dynamo.config.suppress_errors = True
# torch.backends.cuda.matmul.allow_tf32 = True
# torch.backends.cudnn.allow_tf32 = True
dpm_sample_path = "./DPM-sample"
os.makedirs(dpm_sample_path, exist_ok=True)
generator = torch.manual_seed(2024)
# model_id = "/data1/models/stablediffusion/stable-diffusion-xl-base-1.0"
model_id = "/data1/models/stablediffusion/stable-diffusion-2-1-base"
text_file = "PartiPrompts.tsv"
df = pd.read_csv(text_file, sep='\t')
prompts = df['Prompt']
num_inference_steps = 20
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
# pipe = StableDiffusionXLControlNetPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
# pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cuda")
from DeepCache import DeepCacheSDHelper
helper = DeepCacheSDHelper(pipe=pipe)
helper.set_params(
cache_interval=2,
cache_branch_id=0,
)
helper.enable()
###################################
#pipe.unet = torch.compile(pipe.unet,mode="max-autotune-no-cudagraphs")
# pipe.vae = torch.compile(pipe.vae,mode="max-autotune-no-cudagraphs")
###################################
base_count = 0
print("======================================start DPM ==================================")
for prompt in prompts:
start = time.time()
image = pipe(prompt, 512, 512, num_inference_steps=num_inference_steps, num_images_per_prompt=1, generator=generator).images[0]
# image = pipe(prompt).images[0]
print(f"the {base_count} text-to-image use time {time.time()-start}")
image.save(os.path.join(dpm_sample_path, f"{base_count:05}.png"))
base_count += 1
if base_count == 50:
break
print(f"Your samples are ready and waiting for you here\n{dpm_sample_path} \n"
f" \nEnjoy.")
# import tomesd
import torch
import time
import os
import pandas as pd
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
#from xformers.ops import MemoryEfficientAttentionFlashAttentionOp, MemoryEfficientAttentionTritonFwdFlashBwOp
import torch._dynamo
torch._dynamo.config.suppress_errors = True
euler_sample_path = "./Euler-sample"
os.makedirs(euler_sample_path, exist_ok=True)
generator = torch.manual_seed(2024)
#model_id = "/diffusers-sd2/stable-diffusion-2-1-base"
model_id = "/data1/models/stablediffusion/stable-diffusion-2-1-base"
text_file = "PartiPrompts.tsv"
df = pd.read_csv(text_file, sep='\t')
prompts = df['Prompt']
num_inference_steps = 50
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cuda")
from DeepCache import DeepCacheSDHelper
helper = DeepCacheSDHelper(pipe=pipe)
helper.set_params(
cache_interval=2,
cache_branch_id=0,
)
helper.enable()
###################################
#pipe.unet = torch.compile(pipe.unet,mode="max-autotune-no-cudagraphs")
#pipe.vae = torch.compile(pipe.vae,mode="max-autotune-no-cudagraphs")
###################################
base_count = 0
print("======================================start EULER ==================================")
for prompt in prompts:
start = time.time()
image = pipe(prompt, 512, 512, num_inference_steps=num_inference_steps, num_images_per_prompt=1, generator=generator).images[0]
print(f"the {base_count} text-to-image use time {time.time()-start}")
image.save(os.path.join(euler_sample_path, f"{base_count:05}.png"))
base_count += 1
if base_count == 50:
break
print(f"Your samples are ready and waiting for you here\n{euler_sample_path} \n"
f" \nEnjoy.")
This diff is collapsed.
- pip uninstall torch -y
- pip uninstall torchvision -y
- pip install ./whl/torch-2.1.0+git93ce03f.abi0.dtk2404-cp310-cp310-manylinux2014_x86_64.whl -i https://pypi.doubanio.com/simple
- pip install ./whl/torchvision-0.16.0+das1.0+gitc9e7141.abi0.dtk2404.torch2.1-cp310-cp310-manylinux2014_x86_64.whl -i https://pypi.doubanio.com/simple
- pip install -r requirements.txt -i https://pypi.doubanio.com/simple
## 训练启动
- 复制open_clip_pytorch_model.bin 到当前文件下
- bash sd-run.sh
\ No newline at end of file
import numpy as np
import pandas as pd
import torch
import sys
from tqdm import tqdm
import open_clip
import argparse, os
import torch.nn.functional as F
from PIL import Image
from torch.utils.data import Dataset
from transformers import CLIPTokenizer
class TextImagePairDataset_all(Dataset):
def __init__(self, text_file, image_dir, tokenizer, transform=None):
self.image_dir = image_dir
self.text_file = text_file
self.tokenizer = tokenizer
self.transform = transform
df = pd.read_csv(text_file, sep='\t')
self.image_paths = [os.path.join(image_dir, f"{f:05}.png") for f in range(len(df))]
# df = pd.read_csv(text_file, sep='\t')
self.prompts = df['Prompt']
assert len(self.image_paths) == len(self.prompts), "The number of images and texts must be the same."
def __len__(self):
return len(self.image_paths)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
image_path = self.image_paths[idx]
text = self.prompts[idx]
image = Image.open(image_path).convert('RGB')
image = self.transform(image) #.unsqueeze(0)
tokens = self.tokenizer(text)
return tokens, image
class TextImagePairDataset(Dataset):
def __init__(self, text_file, image_dir, tokenizer, transform=None):
self.image_dir = image_dir
self.text_file = text_file
self.tokenizer = tokenizer
self.transform = transform
self.image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith(('.png','.jpg','.jpeg','.tiff','.bmp','.gif'))]
self.texts = []
with open(text_file, 'r') as f:
for line in f:
self.texts.append(line.strip())
assert len(self.image_paths) == len(self.texts), "The number of images and texts must be the same."
def __len__(self):
return len(self.image_paths)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
image_path = self.image_paths[idx]
text = self.texts[idx]
image = Image.open(image_path) # .convert('RGB')
image = self.transform(image) #.unsqueeze(0)
tokens = self.tokenizer(text)
return tokens, image
def calculate_clip_score(texts_file, images_dir, batch_size, device, num_workers, output):
model_clip, _, preprocess_clip = open_clip.create_model_and_transforms('ViT-H-14', device=device, pretrained='laion2b_s32b_b79k')
tokenizer = open_clip.get_tokenizer('ViT-H-14')
dataset = TextImagePairDataset_all(texts_file, images_dir, tokenizer=tokenizer, transform=preprocess_clip)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=num_workers)
all_scores = []
all_scores_cpu = []
all_similarity = []
print(len(dataloader))
for texts, imgs in tqdm(dataloader):
texts = texts.reshape(texts.shape[1],texts.shape[2]).to(device)
texts = texts.to(device)
imgs = imgs.to(device)
with torch.no_grad():
img_fts = model_clip.encode_image(imgs)
text_fts = model_clip.encode_text(texts)
scores = F.cosine_similarity(img_fts, text_fts).squeeze()
all_scores.append(scores)
results_name = f"{output}.txt"
if os.path.exists(results_name):
os.remove(results_name)
print("delete old results")
for i in range(len(all_scores)):
with open(results_name, 'a') as f:
f.write(str(all_scores[i].cpu().numpy()) + '\n')
all_scores_cpu.append(all_scores[i].cpu().numpy())
average_score = np.mean(all_scores_cpu)
return average_score
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--texts",
type=str,
nargs="?",
default="./PartiPrompts.tsv", #数据集路径
# default="texts/text.txt",
)
parser.add_argument(
"--images",
type=str,
nargs="?",
default="./DPM-sample" #保存图片的文件夹路径
)
parser.add_argument(
"--output",
type=str,
nargs="?",
default="./DMP_all_scores" #保存图片的文件夹路径
)
parser.add_argument(
"--batch_size",
type=int,
default=1
)
parser.add_argument(
"--num_workers",
type=int,
default=1
)
parser.add_argument(
"--device",
type=str,
default="cuda",
)
args = parser.parse_args()
if args.device is None:
device = torch.device('cuda' if (torch.cuda.is_available()) else 'cpu')
else:
device = torch.device(args.device)
clip_score = calculate_clip_score(args.texts, args.images, args.batch_size, device, args.num_workers, args.output)
print('CLIP-score: ', clip_score)
if __name__ == '__main__':
main()
accelerate==0.28.0
certifi==2024.7.4
charset-normalizer==3.3.2
DeepCache==0.1.1
diffusers==0.29.2
filelock==3.15.4
fsspec==2024.6.1
ftfy==6.2.0
huggingface-hub==0.24.0
idna==3.7
importlib_metadata==8.0.0
Jinja2==3.1.4
MarkupSafe==2.1.5
mpmath==1.3.0
networkx==3.3
numpy==1.24.0
open-clip-torch==2.24.0
packaging==24.1
pandas==2.2.2
peft==0.9.0
pillow==10.4.0
protobuf==5.27.2
psutil==6.0.0
python-dateutil==2.9.0.post0
pytz==2024.1
PyYAML==6.0.1
regex==2024.5.15
requests==2.32.3
safetensors==0.4.3
sentencepiece==0.2.0
six==1.16.0
sympy==1.13.0
timm==1.0.7
tokenizers==0.15.2
#torch
#torchvision
tqdm==4.66.4
transformers==4.38.1
typing_extensions==4.12.2
tzdata==2024.1
urllib3==2.2.2
wcwidth==0.2.13
zipp==3.19.2
export HIP_VISIBLE_DEVICES=0
unset ROCBLAS_TENSILE_LIBPATH
export LD_LIBRARY_PATH=rocblas-install-0513/lib:$LD_LIBRARY_PATH
# export MIOPEN_PRECISION_FP32_FP32_FP32_TF32_FP32=1
# 推理
# hipprof --hip-trace
python ./DPM_diffusers.py
python ./DDIM_diffusers.py
python ./EULER_diffusers.py
# # clip-score
# python ./clip-score.py --texts ./PartiPrompts.tsv --images DPM-sample --output DMP_all_scores
# python ./clip-score.py --texts ./PartiPrompts.tsv --images DDIM-sample --output DDIM_all_scores
# python ./clip-score.py --texts ./PartiPrompts.tsv --images Euler-sample --output EULER_all_scores
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment