from diffusers import AutoencoderKL

import torch
from PIL import Image
from diffusers.image_processor import VaeImageProcessor

device = torch.device("cuda:0")

vae = AutoencoderKL.from_pretrained("/home/catvton_train/pretrained_models/stable-diffusion-inpainting/", subfolder="sd-vae-ft-mse")

vae.to(device).to(torch.bfloat16)

vae_processor = VaeImageProcessor(vae_scale_factor=8)

img_path = "./cloth/08424_00.jpg"

image = Image.open(img_path)

image = vae_processor.preprocess(image, 512, 384)[0]

image.unsqueeze_(0)

with torch.no_grad():
    image_latent = vae.encode(image.to(device).to(vae.dtype)).latent_dist.sample()

    image_latent = image_latent * vae.config.scaling_factor

    image_latent = image_latent * (1/vae.config.scaling_factor)

    image = vae.decode(image_latent).sample

image = (image / 2 + 0.5).clamp(0,1)

image = image.permute(0, 2, 3, 1).cpu().float().numpy()

image = image[0]

image = (image * 255).round().astype("uint8")

image = Image.fromarray(image)

image.save("test.png")