from diffusers import AutoencoderKL import torch from PIL import Image from diffusers.image_processor import VaeImageProcessor device = torch.device("cuda:0") vae = AutoencoderKL.from_pretrained("/home/catvton_train/pretrained_models/stable-diffusion-inpainting/", subfolder="sd-vae-ft-mse") vae.to(device).to(torch.bfloat16) vae_processor = VaeImageProcessor(vae_scale_factor=8) img_path = "./cloth/08424_00.jpg" image = Image.open(img_path) image = vae_processor.preprocess(image, 512, 384)[0] image.unsqueeze_(0) with torch.no_grad(): image_latent = vae.encode(image.to(device).to(vae.dtype)).latent_dist.sample() image_latent = image_latent * vae.config.scaling_factor image_latent = image_latent * (1/vae.config.scaling_factor) image = vae.decode(image_latent).sample image = (image / 2 + 0.5).clamp(0,1) image = image.permute(0, 2, 3, 1).cpu().float().numpy() image = image[0] image = (image * 255).round().astype("uint8") image = Image.fromarray(image) image.save("test.png")