{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "411c59b3-f177-4a10-8925-d931ce572eaa", "metadata": {}, "outputs": [], "source": [ "import torch\n", "from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipelineLegacy, DDIMScheduler, AutoencoderKL\n", "from PIL import Image\n", "\n", "from ip_adapter import IPAdapter" ] }, { "cell_type": "code", "execution_count": null, "id": "6b6dc69c-192d-4d74-8b1e-f0d9ccfbdb49", "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "current_dir = os.getcwd()\n", "print(current_dir)\n", "\n", "base_model_path = f\"{current_dir}/pretrained_models/sd1.5/Realistic_Vision_v4.0_noVAE\"\n", "vae_model_path = f\"{current_dir}/pretrained_models/sd1.5/sd-vae-ft-mse\"\n", "image_encoder_path = f\"{current_dir}/pretrained_models/models/image_encoder/\"\n", "ip_ckpt = f\"{current_dir}/pretrained_models/models/ip-adapter_sd15.safetensors\"\n", "device = \"cuda\"" ] }, { "cell_type": "code", "execution_count": null, "id": "63ec542f-8474-4f38-9457-073425578073", "metadata": {}, "outputs": [], "source": [ "def image_grid(imgs, rows, cols):\n", " assert len(imgs) == rows*cols\n", "\n", " w, h = imgs[0].size\n", " grid = Image.new('RGB', size=(cols*w, rows*h))\n", " grid_w, grid_h = grid.size\n", " \n", " for i, img in enumerate(imgs):\n", " grid.paste(img, box=(i%cols*w, i//cols*h))\n", " return grid\n", "\n", "noise_scheduler = DDIMScheduler(\n", " num_train_timesteps=1000,\n", " beta_start=0.00085,\n", " beta_end=0.012,\n", " beta_schedule=\"scaled_linear\",\n", " clip_sample=False,\n", " set_alpha_to_one=False,\n", " steps_offset=1,\n", ")\n", "vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)" ] }, { "cell_type": "markdown", "id": "d8081d92-8f42-4bcd-9f83-44aec3f549a9", "metadata": {}, "source": [ "## Image Variations" ] }, { "cell_type": "code", "execution_count": null, "id": "3849f9d0-5f68-4a49-9190-69dd50720cae", "metadata": {}, "outputs": [], "source": [ "# load SD pipeline\n", "pipe = StableDiffusionPipeline.from_pretrained(\n", " base_model_path,\n", " torch_dtype=torch.float16,\n", " scheduler=noise_scheduler,\n", " vae=vae,\n", " feature_extractor=None,\n", " safety_checker=None\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "ec09e937-3904-4d8e-a559-9066502ded36", "metadata": {}, "outputs": [], "source": [ "# read image prompt\n", "image = Image.open(\"assets/images/woman.png\")\n", "image.resize((256, 256))" ] }, { "cell_type": "code", "execution_count": null, "id": "81b1ab06-d3ed-4a7e-a356-9ddf1a2eecd6", "metadata": {}, "outputs": [], "source": [ "# load ip-adapter\n", "ip_model = IPAdapter(pipe, image_encoder_path, ip_ckpt, device)" ] }, { "cell_type": "code", "execution_count": null, "id": "b77f52de-a9e4-44e1-aeec-8165414f1273", "metadata": {}, "outputs": [], "source": [ "# generate image variations\n", "images = ip_model.generate(pil_image=image, num_samples=4, num_inference_steps=50, seed=42)\n", "grid = image_grid(images, 1, 4)\n", "grid" ] }, { "cell_type": "markdown", "id": "cf199405-7cb5-4f78-9973-5fe51c632a41", "metadata": {}, "source": [ "## Image-to-Image" ] }, { "cell_type": "code", "execution_count": null, "id": "6f089ad0-4683-46d7-ab58-9e5fe8f34c67", "metadata": {}, "outputs": [], "source": [ "# load SD Img2Img pipe\n", "del pipe, ip_model\n", "torch.cuda.empty_cache()\n", "pipe = StableDiffusionImg2ImgPipeline.from_pretrained(\n", " base_model_path,\n", " torch_dtype=torch.float16,\n", " scheduler=noise_scheduler,\n", " vae=vae,\n", " feature_extractor=None,\n", " safety_checker=None\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "b8db2b55-2f56-4eef-b2ca-c5126b14feb7", "metadata": {}, "outputs": [], "source": [ "# read image prompt\n", "image = Image.open(\"assets/images/river.png\")\n", "g_image = Image.open(\"assets/images/vermeer.jpg\")\n", "image_grid([image.resize((256, 256)), g_image.resize((256, 256))], 1, 2)" ] }, { "cell_type": "code", "execution_count": null, "id": "a501f284-f295-4673-96ab-e34378da62ab", "metadata": {}, "outputs": [], "source": [ "# load ip-adapter\n", "ip_model = IPAdapter(pipe, image_encoder_path, ip_ckpt, device)" ] }, { "cell_type": "code", "execution_count": null, "id": "f58fff74-9ff2-46e6-bc8a-2ad4ae1fbe0f", "metadata": {}, "outputs": [], "source": [ "# generate\n", "images = ip_model.generate(pil_image=image, num_samples=4, num_inference_steps=50, seed=42, image=g_image, strength=0.6)\n", "grid = image_grid(images, 1, 4)\n", "grid" ] }, { "cell_type": "markdown", "id": "420a7c45-8697-411f-8374-3c81d5d972e3", "metadata": {}, "source": [ "## Inpainting" ] }, { "cell_type": "code", "execution_count": null, "id": "385cb339-3326-4523-a7db-b09e62d39c80", "metadata": {}, "outputs": [], "source": [ "# load SD Inpainting pipe\n", "del pipe, ip_model\n", "torch.cuda.empty_cache()\n", "pipe = StableDiffusionInpaintPipelineLegacy.from_pretrained(\n", " base_model_path,\n", " torch_dtype=torch.float16,\n", " scheduler=noise_scheduler,\n", " vae=vae,\n", " feature_extractor=None,\n", " safety_checker=None\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "c47f8ce5-eed0-41ef-9dbb-2272ec4bc224", "metadata": {}, "outputs": [], "source": [ "# read image prompt\n", "image = Image.open(\"assets/images/girl.png\")\n", "image.resize((256, 256))" ] }, { "cell_type": "code", "execution_count": null, "id": "f9b77289-65f5-459b-ada5-5c7c265bb4a6", "metadata": {}, "outputs": [], "source": [ "masked_image = Image.open(\"assets/inpainting/image.png\").resize((512, 768))\n", "mask = Image.open(\"assets/inpainting/mask.png\").resize((512, 768))\n", "image_grid([masked_image.resize((256, 384)), mask.resize((256, 384))], 1, 2)" ] }, { "cell_type": "code", "execution_count": null, "id": "e49dbdaa-58eb-4bcf-acab-fa5e08f96dcb", "metadata": {}, "outputs": [], "source": [ "# load ip-adapter\n", "ip_model = IPAdapter(pipe, image_encoder_path, ip_ckpt, device)" ] }, { "cell_type": "code", "execution_count": null, "id": "945f6800-18b8-4d95-9f5e-e7035166cbbd", "metadata": {}, "outputs": [], "source": [ "# generate\n", "images = ip_model.generate(pil_image=image, num_samples=4, num_inference_steps=50,\n", " seed=42, image=masked_image, mask_image=mask, strength=0.7, )\n", "grid = image_grid(images, 1, 4)\n", "grid" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 5 }