{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "411c59b3-f177-4a10-8925-d931ce572eaa", "metadata": {}, "outputs": [], "source": [ "import torch\n", "from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipelineLegacy, DDIMScheduler, AutoencoderKL\n", "from PIL import Image\n", "\n", "from ip_adapter import IPAdapterFull" ] }, { "cell_type": "code", "execution_count": 2, "id": "6b6dc69c-192d-4d74-8b1e-f0d9ccfbdb49", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/modelzoo/IP-Adapter\n" ] } ], "source": [ "import os\n", "\n", "current_dir = os.getcwd()\n", "print(current_dir)\n", "\n", "base_model_path = f\"{current_dir}/pretrained_models/sd1.5/Realistic_Vision_v4.0_noVAE\"\n", "vae_model_path = f\"{current_dir}/pretrained_models/sd1.5/sd-vae-ft-mse\"\n", "image_encoder_path = f\"{current_dir}/pretrained_models/models/image_encoder/\"\n", "ip_ckpt = f\"{current_dir}/pretrained_models/models/ip-adapter-full-face_sd15.safetensors\"\n", "device = \"cuda\"" ] }, { "cell_type": "code", "execution_count": 3, "id": "63ec542f-8474-4f38-9457-073425578073", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "An error occurred while trying to fetch /home/modelzoo/IP-Adapter/pretrained_models/sd1.5/sd-vae-ft-mse: Error no file named diffusion_pytorch_model.safetensors found in directory /home/modelzoo/IP-Adapter/pretrained_models/sd1.5/sd-vae-ft-mse.\n", "Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", " return self.fget.__get__(instance, owner)()\n" ] } ], "source": [ "def image_grid(imgs, rows, cols):\n", " assert len(imgs) == rows*cols\n", "\n", " w, h = imgs[0].size\n", " grid = Image.new('RGB', size=(cols*w, rows*h))\n", " grid_w, grid_h = grid.size\n", " \n", " for i, img in enumerate(imgs):\n", " grid.paste(img, box=(i%cols*w, i//cols*h))\n", " return grid\n", "\n", "noise_scheduler = DDIMScheduler(\n", " num_train_timesteps=1000,\n", " beta_start=0.00085,\n", " beta_end=0.012,\n", " beta_schedule=\"scaled_linear\",\n", " clip_sample=False,\n", " set_alpha_to_one=False,\n", " steps_offset=1,\n", ")\n", "vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)" ] }, { "cell_type": "code", "execution_count": 4, "id": "3849f9d0-5f68-4a49-9190-69dd50720cae", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "326a774751fd48408789f8092ec2e520", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading pipeline components...: 0%| | 0/5 [00:00" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# read image prompt\n", "image = Image.open(\"assets/images/ai_face2.png\")\n", "image.resize((256, 256))" ] }, { "cell_type": "code", "execution_count": 7, "id": "70e77d65-262f-415f-9cbd-057d57c4222d", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2ca151f387284f02ba9082152291045d", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/50 [00:00" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# use face as image prompt\n", "images = ip_model.generate(\n", " pil_image=image, num_samples=4, prompt=\"A photo of a girl wearing a black dress, holding red roses in hand, upper body, behind is the Eiffel Tower\",\n", " scale=0.7, width=512, height=704, num_inference_steps=50, seed=42)\n", "grid = image_grid(images, 1, 4)\n", "grid" ] }, { "cell_type": "code", "execution_count": 8, "id": "d83df45f-717d-4bb3-a5fd-0ea30930a431", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "848b25afba9044b8a08c417d33144bfd", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/50 [00:00" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Use a lower scale to mix faces\n", "images = ip_model.generate(\n", " pil_image=image, num_samples=4, prompt=\"photo of Einstein wearing colorful casual shirt in a garden\",\n", " scale=0.4, width=512, height=704, num_inference_steps=50, seed=42)\n", "grid = image_grid(images, 1, 4)\n", "grid" ] }, { "cell_type": "code", "execution_count": 10, "id": "10d6359e-6eb3-432a-a890-b814c505d005", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b046daa2c06e4600a973f607f58e7bf4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading pipeline components...: 0%| | 0/5 [00:00" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# use face as image prompt\n", "images = ip_model.generate(\n", " pil_image=image, num_samples=4, prompt=\"A photo of a girl wearing a black dress, holding red roses in hand, upper body, behind is the Eiffel Tower\",\n", " scale=0.6, width=512, height=704, num_inference_steps=50, seed=42)\n", "grid = image_grid(images, 1, 4)\n", "grid" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }