Merge branch 'windows_process_hiprtc_fix' into fix_jit_test

359d600c · Artur Wojcik · 512ddad7 · 68324c86 · 359d600c · 359d600c
Commit 359d600c authored Nov 27, 2023 by Artur Wojcik
9 changed files
--- a/examples/README.md
+++ b/examples/README.md
@@ -6,4 +6,5 @@ This directory contains examples of common use cases for MIGraphX.
 ## Examples:
 - [MIGraphX usage and utilities](./migraphx)
 - [Vision inference examples](./vision)
 - [Natural language inference examples](./nlp)
\ No newline at end of file
+- [Diffusion inference examples](./diffusion)
--- a/examples/diffusion/README.md
+++ b/examples/diffusion/README.md
+# Diffusion Inference Examples
+- [Python Stable Diffusion 2.1](./python_stable_diffusion_21)
--- a/examples/diffusion/python_stable_diffusion_21/README.md
+++ b/examples/diffusion/python_stable_diffusion_21/README.md
+# Stable Diffusion 2.1
+This version was tested with [rocm 5.7](https://github.com/ROCmSoftwarePlatform/AMDMIGraphX/tree/rocm-5.7.0) revision.
+## Jupyter notebook
+There is a dedicated step-by-step notebook. See [sd21.ipynb](./sd21.ipynb)
+## Console application
+To run the console application, follow these steps below.
+Setup python environment
+```bash
+# this will require the python venv to installed (e.g. apt install python3.8-venv)
+python3 -m venv sd_venv
+. sd_venv/bin/activate
+```
+Install dependencies
+```bash
+pip install -r requirements.txt
+```
+Use MIGraphX Python Module
+```bash
+export PYTHONPATH=/opt/rocm/lib:$PYTHONPATH
+```
+Get models with optimum
+```bash
+optimum-cli export onnx --model stabilityai/stable-diffusion-2-1 models/sd21-onnx
+```
+*Note: `models/sd21-onnx` will be used in the scripts.*
+Run the text-to-image script with the following example prompt and seed:
+```bash
+python txt2img.py --prompt "a photograph of an astronaut riding a horse" --seed 13 --output astro_horse.jpg
+```
+*Note: The first run will compile the models and cache them to make subsequent runs faster.*
+The result should look like this:
+![example_output.jpg](./example_output.jpg)
+## Gradio application
+Note: requires `Console application` to work
+Install gradio dependencies
+```bash
+pip install -r gradio_requirements.txt
+```
+Usage
+```bash
+python gradio_app.py
+```
+This will load the models (which can take several minutes), and when the setup is ready, starts a server on `http://127.0.0.1:7860`.
--- a/examples/diffusion/python_stable_diffusion_21/example_output.jpg
+++ b/examples/diffusion/python_stable_diffusion_21/example_output.jpg
--- a/examples/diffusion/python_stable_diffusion_21/gradio_app.py
+++ b/examples/diffusion/python_stable_diffusion_21/gradio_app.py
+#####################################################################################
+# The MIT License (MIT)
+#
+# Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#####################################################################################
+from txt2img import StableDiffusionMGX
+import gradio as gr
+def main():
+    # Note: This will load the models, which can take several minutes
+    sd = StableDiffusionMGX()
+    def gr_wrapper(prompt, negative_prompt, steps, seed, scale):
+        result = sd.run(str(prompt), str(negative_prompt), int(steps),
+                        int(seed), float(scale))
+        return StableDiffusionMGX.convert_to_rgb_image(result)
+    demo = gr.Interface(
+        gr_wrapper,
+        [
+            gr.Textbox(value="a photograph of an astronaut riding a horse",
+                       label="Prompt"),
+            gr.Textbox(value="", label="Negative prompt (Optional)"),
+            gr.Slider(1, 100, step=1, value=20, label="Number of steps"),
+            gr.Textbox(value=13, label="Random seed"),
+            gr.Slider(1, 20, step=0.1, value=7.0, label="Guidance scale"),
+        ],
+        "image",
+    )
+    demo.launch()
+if __name__ == "__main__":
+    main()
--- a/examples/diffusion/python_stable_diffusion_21/gradio_reqirements.txt
+++ b/examples/diffusion/python_stable_diffusion_21/gradio_reqirements.txt
+#####################################################################################
+# The MIT License (MIT)
+#
+# Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#####################################################################################
+-f requirements.txt
+gradio
\ No newline at end of file
--- a/examples/diffusion/python_stable_diffusion_21/requirements.txt
+++ b/examples/diffusion/python_stable_diffusion_21/requirements.txt
+#####################################################################################
+# The MIT License (MIT)
+#
+# Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#####################################################################################
+accelerate
+diffusers
+optimum[onnxruntime]
+transformers
\ No newline at end of file
--- a/examples/diffusion/python_stable_diffusion_21/sd21.ipynb
+++ b/examples/diffusion/python_stable_diffusion_21/sd21.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#  The MIT License (MIT)\n",
+    "#\n",
+    "#  Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.\n",
+    "#\n",
+    "#  Permission is hereby granted, free of charge, to any person obtaining a copy\n",
+    "#  of this software and associated documentation files (the 'Software'), to deal\n",
+    "#  in the Software without restriction, including without limitation the rights\n",
+    "#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n",
+    "#  copies of the Software, and to permit persons to whom the Software is\n",
+    "#  furnished to do so, subject to the following conditions:\n",
+    "#\n",
+    "#  The above copyright notice and this permission notice shall be included in\n",
+    "#  all copies or substantial portions of the Software.\n",
+    "#\n",
+    "#  THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n",
+    "#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n",
+    "#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE\n",
+    "#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n",
+    "#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n",
+    "#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n",
+    "#  THE SOFTWARE."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Stable Diffusion 2.1\n",
+    "\n",
+    "The following example will show how to run `Stable Diffusion 2.1` with `MIGraphX`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install the required dependencies."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install dependencies\n",
+    "!pip install optimum[onnxruntime] transformers diffusers accelerate"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will use optimum to generate the onnx files."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# export models\n",
+    "!optimum-cli export onnx --model stabilityai/stable-diffusion-2-1 models/sd21-onnx"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now it is time to load these models with python.\n",
+    "\n",
+    "First, we make sure that MIGraphX module is found in the python path."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "mgx_lib_path = \"/opt/rocm/lib/\" # or \"/code/AMDMIGraphX/build/lib/\"\n",
+    "if mgx_lib_path not in sys.path:\n",
+    "    sys.path.append(mgx_lib_path)\n",
+    "import migraphx as mgx"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, a helper method to load and cache the models.\n",
+    "\n",
+    "This will use the `models/sd21-onnx` path. If you changed it, make sure to update here as well."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "# helper for model loading\n",
+    "def load_mgx_model(name, shapes):\n",
+    "    file = f\"models/sd21-onnx/{name}/model\"\n",
+    "    print(f\"Loading {name} model from {file}\")\n",
+    "    if os.path.isfile(f\"{file}.mxr\"):\n",
+    "        print(f\"Found mxr, loading it...\")\n",
+    "        model = mgx.load(f\"{file}.mxr\", format=\"msgpack\")\n",
+    "    elif os.path.isfile(f\"{file}.onnx\"):\n",
+    "        print(f\"Parsing from onnx file...\")\n",
+    "        model = mgx.parse_onnx(f\"{file}.onnx\", map_input_dims=shapes)\n",
+    "        model.compile(mgx.get_target(\"gpu\"))\n",
+    "        print(f\"Saving {name} model to mxr file...\")\n",
+    "        mgx.save(model, f\"{file}.mxr\", format=\"msgpack\")\n",
+    "    else:\n",
+    "        print(f\"No {name} model found. Please verify the path is correct and re-try, or re-download model.\")\n",
+    "        os.exit(1)\n",
+    "    return model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "With that, we can load the models. This could take several minutes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text_encoder = load_mgx_model(\"text_encoder\", {\"input_ids\": [1, 77]})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "unet = load_mgx_model(\n",
+    "        \"unet\", {\n",
+    "            \"sample\": [1, 4, 64, 64],\n",
+    "            \"encoder_hidden_states\": [1, 77, 1024],\n",
+    "            \"timestep\": [1],\n",
+    "        })"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vae = load_mgx_model(\"vae_decoder\", {\"latent_sample\": [1, 4, 64, 64]})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Import the remaining packages."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from diffusers import EulerDiscreteScheduler\n",
+    "from transformers import CLIPTokenizer\n",
+    "import torch\n",
+    "import numpy as np\n",
+    "from tqdm.auto import tqdm\n",
+    "from PIL import Image"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Time to load the scheduler and tokenizer from the original source."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_id = \"stabilityai/stable-diffusion-2-1\"\n",
+    "scheduler = EulerDiscreteScheduler.from_pretrained(model_id,\n",
+    "                                                   subfolder=\"scheduler\")\n",
+    "tokenizer = CLIPTokenizer.from_pretrained(model_id, subfolder=\"tokenizer\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we will define all the steps one by one, to make the last step short and simple.\n",
+    "\n",
+    "The first step will be to tokenize the user prompt. It will make a `(1, 77)` shaped `input_ids`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def tokenize(input):\n",
+    "    return tokenizer([input],\n",
+    "                     padding=\"max_length\",\n",
+    "                     max_length=tokenizer.model_max_length,\n",
+    "                     truncation=True,\n",
+    "                     return_tensors=\"np\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Optional\n",
+    "test_tk = tokenize(\"test tokenizer to see the tokens\")\n",
+    "test_tk.input_ids.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We run the tokenized prompt through the `Text Encoder` model. It expects the `(1, 77)` data as `int32`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Optional\n",
+    "text_encoder.get_parameter_shapes()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_embeddings(input):\n",
+    "    return np.array(\n",
+    "        text_encoder.run({\"input_ids\": input.input_ids.astype(np.int32)\n",
+    "                          })[0]).astype(np.float32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Optional\n",
+    "test_emb = get_embeddings(tokenize(\"test tokenizer to see the tokens\"))\n",
+    "test_emb.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The other input of the model is latent representation (pure noise). It will be transformed into a 512x512 image later.\n",
+    "The last input will be the timestep."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_latents(seed):\n",
+    "    return torch.randn(\n",
+    "        (1, 4, 64, 64),\n",
+    "        generator=torch.manual_seed(seed),\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Optional\n",
+    "test_latents = generate_latents(42)\n",
+    "latents.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we add two helpers to access and convert from torch to numpy with the proper datatype."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_scaled_sample(latents, t):\n",
+    "    return scheduler.scale_model_input(latents, t).numpy().astype(np.float32)\n",
+    "\n",
+    "\n",
+    "def get_timestep(t):\n",
+    "    return np.atleast_1d(t.numpy().astype(np.int64))  # convert 0D -> 1D"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The UNet model will be run in a loop. It will predict the noise residual."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Optional\n",
+    "unet.get_parameter_shapes()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def denoise(sample, embeddings, timestep):\n",
+    "    return np.array(\n",
+    "        unet.run({\n",
+    "            \"sample\": sample,\n",
+    "            \"encoder_hidden_states\": embeddings,\n",
+    "            \"timestep\": timestep\n",
+    "        })[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Helpers to do the classifier-free guidance and computing the previous noisy sample."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def perform_guidance(noise_pred_uncond, noise_pred_text, scale):\n",
+    "    return noise_pred_uncond + scale * (noise_pred_text - noise_pred_uncond)\n",
+    "\n",
+    "def compute_previous(noise_pred, t, latents):\n",
+    "    # compute the previous noisy sample x_t -> x_t-1\n",
+    "    return scheduler.step(noise_pred, t, latents).prev_sample\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Scale and decode the image latents with VAE."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def scale_denoised(latents):\n",
+    "    return 1 / 0.18215 * latents\n",
+    "\n",
+    "\n",
+    "def decode(latents):\n",
+    "    return np.array(\n",
+    "        vae.run({\"latent_sample\": latents.numpy().astype(np.float32)})[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And lastly, we need to convert it to an image to display or save."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def convert_to_rgb_image(image):\n",
+    "    image = np.clip(image / 2 + 0.5, 0, 1)\n",
+    "    image = np.transpose(image, (0, 2, 3, 1))\n",
+    "    images = (image * 255).round().astype(\"uint8\")\n",
+    "    return Image.fromarray(images[0])\n",
+    "\n",
+    "def save_image(pil_image, filename=\"output.png\"):\n",
+    "    pil_image.save(filename, format=\"png\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Feel free to play around with these params."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prompt = \"a photograph of an astronaut riding a horse\"\n",
+    "negative_prompt = \"\"\n",
+    "steps = 20\n",
+    "seed = 13\n",
+    "scale = 7.0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And now, to put everything together and run the whole pipeline:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scheduler.set_timesteps(steps)\n",
+    "\n",
+    "text_input, uncond_input = tokenize(prompt), tokenize(negative_prompt)\n",
+    "text_embeddings, uncond_embeddings = get_embeddings(\n",
+    "    text_input), get_embeddings(uncond_input)\n",
+    "latents = generate_latents(seed) * scheduler.init_noise_sigma\n",
+    "\n",
+    "for t in tqdm(scheduler.timesteps):\n",
+    "    sample = get_scaled_sample(latents, t)\n",
+    "    timestep = get_timestep(t)\n",
+    "\n",
+    "    noise_pred_uncond = denoise(sample, uncond_embeddings, timestep)\n",
+    "    noise_pred_text = denoise(sample, text_embeddings, timestep)\n",
+    "\n",
+    "    noise_pred = perform_guidance(noise_pred_uncond, noise_pred_text, scale)\n",
+    "    latents = compute_previous(torch.from_numpy(noise_pred), t, latents)\n",
+    "\n",
+    "latents = scale_denoised(latents)\n",
+    "result = decode(latents)\n",
+    "image = convert_to_rgb_image(result)\n",
+    "\n",
+    "# show the image\n",
+    "image"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you like the generated image, save it with the following:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "save_image(image, \"output.png\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "sd_venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/examples/diffusion/python_stable_diffusion_21/txt2img.py
+++ b/examples/diffusion/python_stable_diffusion_21/txt2img.py
+#  The MIT License (MIT)
+#
+#  Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+#
+#  Permission is hereby granted, free of charge, to any person obtaining a copy
+#  of this software and associated documentation files (the 'Software'), to deal
+#  in the Software without restriction, including without limitation the rights
+#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+#  copies of the Software, and to permit persons to whom the Software is
+#  furnished to do so, subject to the following conditions:
+#
+#  The above copyright notice and this permission notice shall be included in
+#  all copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+#  THE SOFTWARE.
+from argparse import ArgumentParser
+from diffusers import EulerDiscreteScheduler
+from transformers import CLIPTokenizer
+from PIL import Image
+import migraphx as mgx
+import numpy as np
+import os
+import torch
+import time
+from functools import wraps
+# measurement helper
+def measure(fn):
+    @wraps(fn)
+    def measure_ms(*args, **kwargs):
+        start_time = time.perf_counter_ns()
+        result = fn(*args, **kwargs)
+        end_time = time.perf_counter_ns()
+        print(f"Elapsed time: {(end_time - start_time) * 1e-6:.4f} ms\n")
+        return result
+    return measure_ms
+def get_args():
+    parser = ArgumentParser()
+    parser.add_argument(
+        "-s",
+        "--seed",
+        type=int,
+        default=42,
+        help="Random seed",
+    )
+    parser.add_argument(
+        "-t",
+        "--steps",
+        type=int,
+        default=20,
+        help="Number of steps",
+    )
+    parser.add_argument(
+        "-p",
+        "--prompt",
+        type=str,
+        required=True,
+        help="Prompt",
+    )
+    parser.add_argument(
+        "-n",
+        "--negative-prompt",
+        type=str,
+        default="",
+        help="Negative prompt",
+    )
+    parser.add_argument(
+        "--scale",
+        type=float,
+        default=7.0,
+        help="Guidance scale",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        type=str,
+        default=None,
+        help="Output name",
+    )
+    return parser.parse_args()
+class StableDiffusionMGX():
+    def __init__(self):
+        model_id = "stabilityai/stable-diffusion-2-1"
+        print(f"Using {model_id}")
+        print("Creating EulerDiscreteScheduler scheduler")
+        self.scheduler = EulerDiscreteScheduler.from_pretrained(
+            model_id, subfolder="scheduler")
+        print("Creating CLIPTokenizer tokenizer...")
+        self.tokenizer = CLIPTokenizer.from_pretrained(model_id,
+                                                       subfolder="tokenizer")
+        print("Load models...")
+        self.vae = StableDiffusionMGX.load_mgx_model(
+            "vae_decoder", {"latent_sample": [1, 4, 64, 64]})
+        self.text_encoder = StableDiffusionMGX.load_mgx_model(
+            "text_encoder", {"input_ids": [1, 77]})
+        self.unet = StableDiffusionMGX.load_mgx_model(
+            "unet", {
+                "sample": [1, 4, 64, 64],
+                "encoder_hidden_states": [1, 77, 1024],
+                "timestep": [1],
+            })
+    def run(self, prompt, negative_prompt, steps, seed, scale):
+        # need to set this for each run
+        self.scheduler.set_timesteps(steps)
+        print("Tokenizing prompt...")
+        text_input = self.tokenize(prompt)
+        print("Creating text embeddings for prompt...")
+        text_embeddings = self.get_embeddings(text_input)
+        print("Tokenizing negative prompt...")
+        uncond_input = self.tokenize(negative_prompt)
+        print("Creating text embeddings for negative prompt...")
+        uncond_embeddings = self.get_embeddings(uncond_input)
+        print(
+            f"Creating random input data ({1}x{4}x{64}x{64}) (latents) with seed={seed}..."
+        )
+        latents = torch.randn((1, 4, 64, 64),
+                              generator=torch.manual_seed(seed))
+        print("Apply initial noise sigma\n")
+        latents = latents * self.scheduler.init_noise_sigma
+        print("Running denoising loop...")
+        for step, t in enumerate(self.scheduler.timesteps):
+            print(f"#{step}/{len(self.scheduler.timesteps)} step")
+            latents = self.denoise_step(text_embeddings, uncond_embeddings,
+                                        latents, t, scale)
+        print("Scale denoised result...")
+        latents = 1 / 0.18215 * latents
+        print("Decode denoised result...")
+        image = self.decode(latents)
+        return image
+    @staticmethod
+    @measure
+    def load_mgx_model(name, shapes):
+        file = f"models/sd21-onnx/{name}/model"
+        print(f"Loading {name} model from {file}")
+        if os.path.isfile(f"{file}.mxr"):
+            print("Found mxr, loading it...")
+            model = mgx.load(f"{file}.mxr", format="msgpack")
+        elif os.path.isfile(f"{file}.onnx"):
+            print("Parsing from onnx file...")
+            model = mgx.parse_onnx(f"{file}.onnx", map_input_dims=shapes)
+            model.compile(mgx.get_target("gpu"))
+            print(f"Saving {name} model to mxr file...")
+            mgx.save(model, f"{file}.mxr", format="msgpack")
+        else:
+            print(f"No {name} model found. Please download it and re-try.")
+            os.exit(1)
+        return model
+    @measure
+    def tokenize(self, input):
+        return self.tokenizer([input],
+                              padding="max_length",
+                              max_length=self.tokenizer.model_max_length,
+                              truncation=True,
+                              return_tensors="np")
+    @measure
+    def get_embeddings(self, input):
+        return np.array(
+            self.text_encoder.run(
+                {"input_ids":
+                 input.input_ids.astype(np.int32)})[0]).astype(np.float32)
+    @staticmethod
+    def convert_to_rgb_image(image):
+        image = np.clip(image / 2 + 0.5, 0, 1)
+        image = np.transpose(image, (0, 2, 3, 1))
+        images = (image * 255).round().astype("uint8")
+        return Image.fromarray(images[0])
+    @staticmethod
+    def save_image(pil_image, filename="output.png"):
+        pil_image.save(filename)
+    @measure
+    def denoise_step(self, text_embeddings, uncond_embeddings, latents, t,
+                     scale):
+        sample = self.scheduler.scale_model_input(latents,
+                                                  t).numpy().astype(np.float32)
+        timestep = np.atleast_1d(t.numpy().astype(
+            np.int64))  # convert 0D -> 1D
+        noise_pred_uncond = np.array(
+            self.unet.run({
+                "sample": sample,
+                "encoder_hidden_states": uncond_embeddings,
+                "timestep": timestep
+            })[0])
+        noise_pred_text = np.array(
+            self.unet.run({
+                "sample": sample,
+                "encoder_hidden_states": text_embeddings,
+                "timestep": timestep
+            })[0])
+        # perform guidance
+        noise_pred = noise_pred_uncond + scale * (noise_pred_text -
+                                                  noise_pred_uncond)
+        # compute the previous noisy sample x_t -> x_t-1
+        return self.scheduler.step(torch.from_numpy(noise_pred), t,
+                                   latents).prev_sample
+    @measure
+    def decode(self, latents):
+        return np.array(
+            self.vae.run({"latent_sample":
+                          latents.numpy().astype(np.float32)})[0])
+if __name__ == "__main__":
+    args = get_args()
+    sd = StableDiffusionMGX()
+    result = sd.run(args.prompt, args.negative_prompt, args.steps, args.seed,
+                    args.scale)
+    print("Convert result to rgb image...")
+    image = StableDiffusionMGX.convert_to_rgb_image(result)
+    filename = args.output if args.output else f"output_s{args.seed}_t{args.steps}.png"
+    StableDiffusionMGX.save_image(image, args.output)
+    print(f"Image saved to {filename}")