Commit e0292b6e authored by wangwf's avatar wangwf
Browse files

support flux.1-dev inference

parent 97f17a79
[
{
"theme": "Nature scenery",
"theme_zh": "自然风景",
"examples": [
{
"prompt": "Epic rainbow waterfall cascading through emerald jungle, vibrant mist refracting sunlight, hyperrealistic 8K, cinematic lighting, National Geographic photography, depth of field",
"negative_prompt": "blurry, cartoon, drawing, anime, text, people, buildings, artificial colors, overexposed"
},
{
"prompt": "Tranquil cherry blossom river in moonlight, petals floating on water, soft bokeh, Studio Ghibli style, dreamy pastel colors, matte painting",
"negative_prompt": "sharp lines, photorealistic, modern city, ugliness, blood, fire, broken trees, daytime"
},
{
"prompt": "Aurora borealis over glacial icebergs, mirror-like water reflection, cosmic color palette, long exposure photography, 16K resolution",
"negative_prompt": "daylight, desert, buildings, people, cartoon style, text, blurry edges"
},
{
"prompt": "Volcanic eruption at twilight, lava rivers through obsidian fields, ash clouds illuminated by lightning, dramatic chiaroscuro, concept art",
"negative_prompt": "peaceful scene, snow, vegetation, cartoonish, bright colors, humans, watercolor"
},
{
"prompt": "Surreal desert with floating crystal formations, double suns setting, heat haze distortion, Roger Dean landscape style",
"negative_prompt": "rainforest, ocean, modern structures, animals, night scene, sketch lines"
}
]
},
{
"theme": "Human portrait",
"theme_zh": "人物与肖像",
"examples": [
{
"prompt": "Close-up portrait of a cyborg girl with neon circuit tattoos, holographic blue dreadlocks, reflective rain-soaked skin, cyberpunk aesthetic, cinematic volumetric lighting, art by WLOP",
"negative_prompt": "deformed hands, extra fingers, makeup, traditional clothing, natural hair, smile, hat, watermark, signature"
},
{
"prompt": "Ancient warrior queen in obsidian armor, scar across cheek, dramatic sunset backlight, oil painting style, detailed armor texture, fierce gaze",
"negative_prompt": "cute, modern, glasses, jewelry, high heels, cartoon, blurry background, flowers"
},
{
"prompt": "Renaissance alchemist in candlelit laboratory, holding glowing flask, intricate velvet robes, Caravaggio lighting style",
"negative_prompt": "casual clothes, technology, smile, clean background, sunglasses, photograph"
},
{
"prompt": "Tribal elder with weathered face and feather headdress, golden hour lighting, environmental portrait, National Geographic style",
"negative_prompt": "youth, modern clothing, studio lighting, cybernetics, weapons, cartoon eyes"
},
{
"prompt": "Steampunk inventor with mechanical arm, goggles on forehead, surrounded by brass gadgets, detailed etching illustration",
"negative_prompt": "casual wear, natural limbs, minimalism, futuristic tech, smile, anime style"
}
]
},
{
"theme": "Science fiction and Fantasy",
"theme_zh": "科幻与奇幻",
"examples": [
{
"prompt": "Floating neon city above clouds, holographic billboards, flying cars in rain, Blade Runner 2099, cinematic ultra-wide shot, hyperdetailed, cyberpunk color palette",
"negative_prompt": "medieval, trees, daylight, desert, historical, sketch, low quality, text"
},
{
"prompt": "Crystal cave with bioluminescent fungi, alien plants pulsing light, mysterious portal glowing purple, fantasy concept art, James Gurney style",
"negative_prompt": "technology, robots, humans, sharp edges, modern, cartoon, sunshine"
},
{
"prompt": "Dyson sphere construction around red giant star, swarm robots assembling panels, cosmic scale, sci-fi book cover art",
"negative_prompt": "earth landscape, organic shapes, primitive technology, daytime, hand drawn"
},
{
"prompt": "Elven tree city at dawn, wooden bridges between giant mushrooms, glowing runes, Alan Lee illustration style",
"negative_prompt": "machines, pollution, concrete, modern clothes, guns, desert"
},
{
"prompt": "Time traveler's pocket watch portal, gears floating in spacetime rift, steampunk metaphysics, detailed 3D render",
"negative_prompt": "simple design, natural landscape, cartoon, blood, modern electronics"
}
]
},
{
"theme": "Creature",
"theme_zh": "生物",
"examples": [
{
"prompt": "T-rex with exposed hydraulic muscles and steel plating, roaring in volcanic wasteland, dieselpunk aesthetic, highly detailed scales, dramatic smoke, Simon Stålenhag style",
"negative_prompt": "feathers, natural skin, jungle, peaceful, cute, baby animal, watercolor, blurry"
},
{
"prompt": "Phoenix rising from molten lava, iridescent gold feathers, particle effects, epic fantasy illustration, vibrant fire glow, high contrast",
"negative_prompt": "robot, mechanical, wings, cartoon, pixel art, blood, realistic eagle"
},
{
"prompt": "Mutated deep-sea anglerfish with crystalline teeth, bioluminescent lure, abyssal trench environment, cinematic underwater shot",
"negative_prompt": "land animal, daylight, fur, cute, cartoon eyes, jungle, flying"
},
{
"prompt": "Mechanical hummingbird with clockwork wings, hovering over steam flowers, intricate gear details, steampunk macro photography",
"negative_prompt": "natural feathers, dull colors, stationary, blood, destruction, human scale"
},
{
"prompt": "Forest guardian spirit made of living wood and glowing moss, ancient tree face, fantasy creature design",
"negative_prompt": "robotic parts, metal, urban setting, aggressive pose, fire, sharp teeth"
}
]
},
{
"theme": "Architecture and Space",
"theme_zh": "建筑与空间",
"examples": [
{
"prompt": "Infinite library floating among clouds, spiral bookshelves under glass dome, sunbeams through stained glass, Baroque architecture, magical realism, unreal engine render",
"negative_prompt": "modern design, concrete, people, decay, darkness, Asian style, wood, minimalist"
},
{
"prompt": "Abandoned overgrown subway station, vines swallowing pillars, sunlight through broken ceiling, moss-covered tracks, photorealistic, haunting atmosphere",
"negative_prompt": "clean, futuristic, crowds, bright colors, fantasy creatures, neon lights"
},
{
"prompt": "Bamboo skyscraper with vertical gardens, sustainable futuristic city, daylight aerial view, eco-architecture concept",
"negative_prompt": "ruins, traditional buildings, desert, night, pollution, Gothic style"
},
{
"prompt": "Interdimensional train station with floating platforms, Art Deco design, travelers in vintage suits, Moebius art style",
"negative_prompt": "modern clothing, cars, daylight, destruction, wood material, medieval"
},
{
"prompt": "Submerged Atlantis ruins with coral-covered columns, sunken treasure glow, underwater volumetric rays, fantasy archaeology",
"negative_prompt": "modern structures, people, dry land, bright lighting, technology, cartoon fish"
}
]
},
{
"theme": "Abstraction and Art",
"theme_zh": "抽象与艺术",
"examples": [
{
"prompt": "Abstract explosion of liquid gold and deep blue, metallic fluid dynamics, emotional turbulence, 3D render, luxury aesthetic, motion blur background",
"negative_prompt": "objects, human forms, text, sharp edges, cartoon, flowers, faces, landscape"
},
{
"prompt": "Geometric fractals in iridescent colors, quantum foam texture, glowing dark matter, digital art, 8K wallpaper, trippy",
"negative_prompt": "realistic, photo, animals, people, buildings, simple shapes, dull colors"
},
{
"prompt": "Van Gogh starry night reinterpreted with neural network patterns, swirling digital brushstrokes, post-impressionist algorithm art",
"negative_prompt": "photorealism, sharp lines, solid colors, text, human figures, architecture"
},
{
"prompt": "Kinetic sculpture of floating chrome spheres, light refraction patterns, minimalist abstract, museum installation",
"negative_prompt": "organic shapes, textures, landscapes, people, bright colors, cartoon shading"
},
{
"prompt": "Symphony visualized as interwoven color ribbons, musical notes transforming into light particles, synesthesia art",
"negative_prompt": "recognizable objects, faces, buildings, dark palette, text, photorealistic"
}
]
},
{
"theme": "Daily life",
"theme_zh": "日常生活",
"examples": [
{
"prompt": "Cozy rainy Paris café interior, warm lamp light on books, steaming latte, blurred raindrops on window, vintage filter, atmospheric perspective",
"negative_prompt": "sunny, crowded, modern design, bright colors, empty cups, dirty, anime style"
},
{
"prompt": "Macro shot of matcha cake with red bean filling, powdered sugar dusting, food photography, shallow depth of field, natural light",
"negative_prompt": "burnt, sliced, fork, human hands, ugly plate, synthetic colors, text"
},
{
"prompt": "Vintage record store at twilight, neon sign reflection on wet pavement, vinyl records glowing softly, cinematic ambiance",
"negative_prompt": "daylight, empty shelves, modern streaming devices, people fighting, cartoon style"
},
{
"prompt": "Minimalist Japanese breakfast arrangement, miso soup steam rising, morning light through shoji screen, film photography grain",
"negative_prompt": "messy table, Western food, dinner scene, artificial lighting, people, text"
},
{
"prompt": "Antique typewriter on oak desk with scattered paper, dust motes in sunbeam, nostalgic still life, shallow depth of field",
"negative_prompt": "laptop, modern office, bright colors, digital screens, human hands, damage"
}
]
},
{
"theme": "History and Retro",
"theme_zh": "历史与复古",
"examples": [
{
"prompt": "Steampunk laboratory with brass microscopes, glowing vials, Tesla coils sparking, intricate gear mechanisms, gas lamp lighting, detailed etching style",
"negative_prompt": "modern electronics, plastic, clean room, sunlight, minimalism, people, damage"
},
{
"prompt": "Samurai standing in bamboo forest, cherry blossoms falling, traditional Japanese ink wash painting, minimalistic monochrome",
"negative_prompt": "color, gun, Western armor, smile, crowd, modern clothes"
},
{
"prompt": "Egyptian pharaoh's tomb discovery moment, torchlight revealing golden artifacts, sandstone hieroglyphs, dramatic Indiana Jones style",
"negative_prompt": "modern tools, electric lighting, tourists, damage, futuristic elements, cartoon"
},
{
"prompt": "1920s speakeasy jazz club, smoke-filled atmosphere, flapper dancers, sepia tone photograph with film grain",
"negative_prompt": "modern clothing, daylight, digital devices, bright colors, empty room, destruction"
},
{
"prompt": "Viking longship sailing through glacial fjord, aurora reflecting on water, historical accuracy, epic cinematic shot",
"negative_prompt": "motorboats, tropical water, modern ships, pollution, cartoon characters, desert"
}
]
},
{
"theme": "Dark and Grotesque",
"theme_zh": "暗黑与怪诞",
"examples": [
{
"prompt": "Surreal heart made of cracked porcelain, wrapped in black thorns, blood dripping onto white roses, dark academia aesthetic, Greg Rutkowski style",
"negative_prompt": "cute, intact, healing, jewelry, cartoon, happy, bright background"
},
{
"prompt": "Ghostly figure in abandoned asylum, long exposure motion blur, green phosphorescent mist, horror film still, grainy film texture",
"negative_prompt": "colorful, daylight, beautiful face, modern clothes, sharp focus, flowers"
},
{
"prompt": "Living tapestry of screaming faces emerging from medieval castle wall, Gothic horror, Zdzisław Beksiński influence",
"negative_prompt": "bright colors, peaceful scene, modern art, intact surface, cute animals"
},
{
"prompt": "Alchemical ritual circle with inverted symbols, floating obsidian shards, candle smoke forming skulls, dark fantasy illustration",
"negative_prompt": "happy ceremony, daylight, children, healing magic, modern setting, cartoon style"
},
{
"prompt": "Bone cathedral interior with flesh pipe organs, stained glass depicting nightmares, HR Giger biomechanical style",
"negative_prompt": "normal church, sunlight, clean surfaces, people praying, bright colors"
}
]
},
{
"theme": "Technology and Digital",
"theme_zh": "科技与数码",
"examples": [
{
"prompt": "Holographic neural network in deep space, glowing data streams connecting nodes, cybernetic tree, sci-fi UI overlay, neon blue and purple, 3D render",
"negative_prompt": "organic, hand-drawn, paper, medieval, earth, animals, text, blur"
},
{
"prompt": "Quantum computer core with floating crystal processors, laser light refraction, clean futuristic lab, cinematic sci-fi, volumetric fog",
"negative_prompt": "wires, vintage, wood, people, mess, low tech, explosion"
},
{
"prompt": "Augmented reality city overlay visible through smart glasses, digital information layers floating over streets, cyberpunk UI design",
"negative_prompt": "natural landscape, historical setting, low tech, blurry interface, destruction"
},
{
"prompt": "Nanobot swarm reconstructing broken antique vase in timelapse, technology meets tradition, macro photography",
"negative_prompt": "human hands, organic growth, magic, blurry, cartoon robots, fire"
},
{
"prompt": "Singularity event visualized as fractal energy convergence, quantum foam eruption, abstract technology art, 8K resolution",
"negative_prompt": "mechanical parts, people, buildings, earth landscape, simple shapes, text"
}
]
}
]
......@@ -7,5 +7,5 @@ onnxruntime>=1.22.1
pillow
prettytable
tokenizers<0.22,>=0.21
torch>=2.4.1
torch>=2.5.1
transformers>=4.54.1
python tools/run_pipe.py -m ./Flux.1-dev-new-onnx/ -p "A cat holding a sign that says hello world" --num-images-per-prompt 1 --img-size 512 --save-prefix flux_bs1_512
python tools/run_pipe.py -m ./Flux.1-dev-new-onnx/ -p "A cat holding a sign that says hello world" --num-images-per-prompt 1 --img-size 1024 --save-prefix flux_bs1_1024
python tools/run_pipe.py -m ./Flux.1-dev-new-onnx/ -p "A cat holding a sign that says hello world" --num-images-per-prompt 2 --img-size 512 --save-prefix flux_bs2_512
python tools/run_pipe.py -m ./Flux.1-dev-new-onnx/ -p "A cat holding a sign that says hello world" --num-images-per-prompt 2 --img-size 1024 --save-prefix flux_bs2_1024
# rocblas_lib_path=$1
# export MIGRAPHX_ENABLE_MIOPEN_GROUPNORM=1
# export MIGRAPHX_ENABLE_NHWC=1
# export MIGRAPHX_ENABLE_MIOPEN_CONCAT=1
# export MIGRAPHX_STABLEDIFFUSION_OPT=1
# export MIGRAPHX_ENABLE_MIOPEN_GN_LN=1
# export MIGRAPHX_ENABLE_LAYERNORM_FUSION=1
# export PADDING_MALLOC=0 # run on KME
# export HIP_VISIBLE_DEVICES=6
# export LD_LIBRARY_PATH=/public/home/zhuww/wangwf/pkgs/rocblas-install-0626/lib:$LD_LIBRARY_PATH
# export LD_LIBRARY_PATH=${rocblas_lib_path}:$LD_LIBRARY_PATH
MIGRAPHX_TRANSPOSE_CONTIGUOUS_GEMM=1
## 以通用方式加载并运行pipeline
```bash
python tools/run_pipe.py -m /path/to/models -p "the ocean in dream"
```
脚本参数说明:
| 参数 | 说明 | 类型 | 默认值 |
| --- | --- | --- | --- |
| `-m` / `--model-dir` | **必选**,pipeline 模型路径 | str | None |
| `--force-compile` | 可选,是否强制重新编译模型 | bool | False |
| `--num-images-per-prompt` | 可选,一条提示词一次生成图片的数量 | int | 1 |
| `--img-size` | 可选,生成图像尺寸,如果不设置,则跟随各 pipeline 默认的图像尺寸参数 | int | None |
| `-p` / `--prompt` | **必选**,提示词,描述图片内容、风格、生成要求等 | str | None |
| `-n` / `--negative-prompt` | 可选,反向提示词,例如 "ugly" | str | None |
| `-t` / `--num-inference-steps` | 可选,生成图片时迭代多少步 | int | 50 |
| `--seed` | 可选,随机数种子 | int | 42 |
| `--save-prefix` | 可选,保存图片的前缀 | str | None |
## 以自定义组件方式加载并运行pipeline
> reference: [https://huggingface.co/docs/diffusers/using-diffusers/custom_pipeline_overview#community-components](https://huggingface.co/docs/diffusers/using-diffusers/custom_pipeline_overview#community-components)
> Community components allow users to build pipelines that may have customized components that are not a part of Diffusers. If your pipeline has custom components that Diffusers doesn’t already support, you need to provide their implementations as Python modules. These customized components could be a VAE, UNet, and scheduler. In most cases, the text encoder is imported from the Transformers library. The pipeline code itself can also be customized.
在此项目中,我们以 MIGraphX 为推理后端重写了 text_encoder、unet、vae_decoder 等组件,除了通用的模型加载方法 `DiffusionPipeline.from_pretrained` 外,我们还可以先加载每个自定义的组件,然后创建 pipeline 实例。以 sdxl 为例:
```bash
# 运行 sdxl
python tools/run_sdxl_with_custom_components.py -m /path/to/sdxl_models -p "the ocean in bream"
```
脚本参数说明:
| 参数 | 说明 | 类型 | 默认值 |
| --- | --- | --- | --- |
| `-m` / `--model-dir` | **必选**,sdxl 模型路径 | str | None |
| `--force-compile` | 可选,是否强制重新编译模型 | bool | False |
| `--num-images-per-prompt` | 可选,一条提示词一次生成图片的数量 | int | 1 |
| `--img-size` | 可选,生成图像尺寸 | int | 1024 |
| `-p` / `--prompt` | **必选**,提示词,描述图片内容、风格、生成要求等 | str | None |
| `-n` / `--negative-prompt` | 可选,反向提示词,例如 "ugly" | str | None |
| `-t` / `--num-inference-steps` | 可选,生成图片时迭代多少步 | int | 50 |
| `--seed` | 可选,随机数种子 | int | 42 |
| `--save-prefix` | 可选,保存图片的前缀 | str | None |
## 批量生成图片
可以将多条不同主题的提示与反向提示词放在一个 json 文件中,然后使用 `tools/run_examples.py` 批量生成图片。
```bash
# 运行 sdxl
python tools/run_examples.py \
-m /path/to/sdxl_models \
--examples-json examples/prompts_and_negative_prompts.json \
--output-dir examples/sdxl-images-1024
```
脚本参数说明:
| 参数 | 说明 | 类型 | 默认值 |
| --- | --- | --- | --- |
| `-m` / `--model-dir` | **必选**,sdxl 模型路径 | str | None |
| `--force-compile` | 可选,是否强制重新编译模型 | bool | False |
| `--num-images-per-prompt` | 可选,一条提示词一次生成图片的数量 | int | 1 |
| `--img-size` | 可选,生成图像尺寸,如果不设置,则跟随各 pipeline 默认的图像尺寸参数 | int | None |
| `-t` / `--num-inference-steps` | 可选,生成图片时迭代多少步 | int | 50 |
| `--seed` | 可选,随机数种子 | int | 42 |
| `-examples-json` | 可选,提示词与反向提示词文件 | str | examples/prompts_and_negative_prompts.json |
| `--output-dir` | 可选,保存生成的图片路径 | str | None |
其中,提示词与反向提示词文件格式为:
```json
[
{
"theme": "theme0 name",
"examples": [
{
"prompt": "promt0 text here",
"negative_prompt": "negative_prompt0 text here"
},
{
"prompt": "promt1 text here",
"negative_prompt": "negative_prompt1 text here"
},
...
]
},
{
"theme": "theme1 name",
"examples": [
{
"prompt": "promt0 text here",
"negative_prompt": "negative_prompt0 text here"
},
{
"prompt": "promt1 text here",
"negative_prompt": "negative_prompt1 text here"
},
...
]
},
...
]
```
示例:[../examples/prompts_and_negative_prompts.json](../examples/prompts_and_negative_prompts.json)
## 统计各模块耗时
此项目支持通过非侵入式打点统计各模块耗时,大致的步骤如下:
1. 创建计时器;
2. 将要统计耗时的函数或方法注册进计时器;
3. 开启计时器;
4. 运行要统计耗时的函数或方法;
5. 打印统计数据。
简单使用示例:
```python
import random
import time
from migraphx_diffusers import AutoTimer
def sleep_func(sleep_seconds=1):
time.sleep(sleep_seconds)
class SleepClass:
def __init__(self):
self.min_seconds = 1
self.max_seconds = 5
def random_sleep(self):
time.sleep(random.randint(self.min_seconds, self.max_seconds))
def __call__(self, sleep_seconds=1):
time.sleep(sleep_seconds)
obj = SleepClass()
t = AutoTimer() # step1
# step2
t.add_target(sleep_func, key="sleep_func")
t.add_target(obj.random_sleep, key="random_sleep")
t.add_target(obj, key="__call__")
t.start_work() # step3
# step4
for i in range(10):
sleep_func()
obj()
if i % 3 == 0:
obj.random_sleep()
t.summary() # step5
```
运行结果如下:
```
+--------------------------------------------------------------------------------------+
| Test Latency |
+--------------+----------+--------------+--------------+--------------+---------------+
| 模块 | 运行次数 | 最长耗时(ms) | 最短耗时(ms) | 平均耗时(ms) | 平均性能(fps) |
+--------------+----------+--------------+--------------+--------------+---------------+
| sleep_func | 10 | 1001.06 | 1001.02 | 1001.04 | 1.0 |
| __call__ | 10 | 1001.07 | 1000.06 | 1000.94 | 1.0 |
| random_sleep | 4 | 4004.1 | 1001.05 | 2252.33 | 0.44 |
+--------------+----------+--------------+--------------+--------------+---------------+
```
统计 sdxl 或 sd2.1 端到端性能与各组件性能数据:
```bash
python tools/time_count.py -m /path/to/sdxl_models
```
脚本参数说明:
| 参数 | 说明 | 类型 | 默认值 |
| --- | --- | --- | --- |
| `-m` / `--model-dir` | **必选**,sdxl 模型路径 | str | None |
| `--force-compile` | 可选,是否强制重新编译模型 | bool | False |
| `--num-images-per-prompt` | 可选,一条提示词一次生成图片的数量 | int | 1 |
| `--img-size` | 可选,生成图像尺寸,如果不设置,则跟随各 pipeline 默认的图像尺寸参数 | int | None |
| `-t` / `--num-inference-steps` | 可选,生成图片时迭代多少步 | int | 50 |
| `--num-warmup-loops` | 可选,warmup 迭代次数 | int | 1 |
| `--num-count-loops` | 可选,性能统计迭代次数 | int | 100 |
| `--out-csv-file` | 可选,性能数据保存路径,CSV文件 | str | ./perf-{date}-{time}.csv |
## SD2.1 端到端性能测试
```bash
python tools/run_sd2_1.py /path/to/sd2.1_models
```
脚本参数说明:
| 参数 | 说明 | 类型 | 默认值 |
| --- | --- | --- | --- |
| `model-dir` | **位置参数**,sd2.1 模型路径 | str | None |
| `--result-dir` | 可选,生成图片的存放目录 | str | ./results |
测试场景如下:
+ batchsize: 1、2、4、8
+ image_size: 512
+ num_inference_steps: 20
## 模型精度评估
文生图任务一般采用 CLIP-score 来评估模型,首先准备数据集与多模态模型:
```bash
# 下载数据集
wget https://raw.githubusercontent.com/google-research/parti/main/PartiPrompts.tsv --no-check-certificate
# 下载模型
mkdir ./openai
huggingface-cli download openai/clip-vit-base-patch16 --local-dir ./openai/clip-vit-base-patch16 --local-dir-use-symlinks False
```
根据数据集中的提示词生成图片:
```bash
python tools/gen_p2_images.py -m /path/to/models --num-images-per-prompt 4 -p ./PartiPrompts.tsv --save-dir ./p2_images
```
评估生成的结果:
```bash
python python tools/evaluate.py -m ./openai/clip-vit-base-patch16 -d ./p2_images
```
\ No newline at end of file
from collections import defaultdict
import json
import os
import os.path as osp
import cv2
import numpy as np
from prettytable import PrettyTable
import torch
import tqdm
from torchmetrics.multimodal import CLIPScore
from torchmetrics.functional.multimodal.clip_score import _clip_score_update
class P2CLIPScore(CLIPScore):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.category2scores = defaultdict(list)
self.category2nprompts = defaultdict(int)
self.category2nimages = defaultdict(int)
def process(self, p2_images_dir):
prompt_dirs = []
for cat_dir_name in os.listdir(p2_images_dir):
for prompt_dir_name in os.listdir(osp.join(p2_images_dir, cat_dir_name)):
prompt_dir = osp.join(p2_images_dir, cat_dir_name, prompt_dir_name)
prompt_dirs.append(prompt_dir)
print("Processing...")
for prompt_dir in tqdm.tqdm(prompt_dirs):
prompt_json = osp.join(prompt_dir, "prompt_info.json")
with open(prompt_json, "r") as f:
prompt_info = json.load(f)
category = prompt_info["category"]
cat_dir_name = prompt_dir.split("/")[-2]
assert cat_dir_name == category.replace(" ", "").replace("&", "_")
imgs = []
for file_name in os.listdir(prompt_dir):
if not file_name.endswith(".png"):
continue
image_path = osp.join(prompt_dir, file_name)
img = cv2.imread(image_path)[None, ...]
imgs.append(img)
assert len(imgs) >= 1
scores, _ = _clip_score_update(
[prompt_info["prompt_text"]] * len(imgs),
torch.from_numpy(np.concatenate(imgs, 0).transpose(0, 3, 1, 2)),
self.model,
self.processor
)
# self.category2scores["All"].extend(scores.detach().numpy().tolist())
# self.category2scores[category].extend(scores.detach().numpy().tolist())
self.category2scores["All"].append(scores.max().item())
self.category2scores[category].append(scores.max().item())
self.category2nprompts["All"] += 1
self.category2nprompts[category] += 1
self.category2nimages["All"] += len(imgs)
self.category2nimages[category] += len(imgs)
def compute(self, output_json=None):
pt = PrettyTable()
pt.title = "Evaluation Results of PartiPrompts Dataset"
pt.field_names = ["Category", "Num Prompts", "Num Images", "Mean CLIP Score"]
for category, scores in self.category2scores.items():
num_prompts = self.category2nprompts[category]
num_images = self.category2nimages[category]
mean_score = sum(scores) / len(scores)
pt.add_row([category, num_prompts, num_images, round(mean_score, 4)])
print(pt)
if output_json is not None:
with open(output_json, "w") as f:
f.write(pt.get_json_string())
def main():
import argparse
parser = argparse.ArgumentParser(
"Evaluate text2image results of PartiPrompts dataset")
parser.add_argument("-m", "--model-dir",
type=str,
required=True,
help="The path to the model directory.")
parser.add_argument("-d", "--data-dir",
type=str,
required=True,
help="The path to the evaluation data directory.")
parser.add_argument("-o", "--output-json",
type=str,
default=None,
help="Output json file path.")
args = parser.parse_args()
p2_clip_score = P2CLIPScore(args.model_dir)
p2_clip_score.process(args.data_dir)
p2_clip_score.compute(args.output_json)
if __name__ == "__main__":
main()
import argparse
import os
import os.path as osp
import shutil
import onnx
import torch
from transformers import (CLIPTextModel, T5EncoderModel)
from diffusers import FluxTransformer2DModel, AutoencoderKL
def get_local_path(local_dir, model_dir):
model_local_dir = os.path.join(local_dir, model_dir)
if not os.path.exists(model_local_dir):
os.makedirs(model_local_dir)
return model_local_dir
def gather_weights_to_one_file(onnx_path):
onnx_model = onnx.load(onnx_path)
onnx_model_without_data = onnx.load(onnx_path, load_external_data=False)
os.remove(onnx_path) # remove old model file
# remove external data file
dir_path = osp.dirname(onnx_path)
for ini in onnx_model_without_data.graph.initializer:
for ed in ini.external_data:
external_data_path = osp.join(dir_path, ed.value)
if osp.isfile(external_data_path):
os.remove(external_data_path)
for node in onnx_model_without_data.graph.node:
if node.op_type != "Constant":
continue
for attr in node.attribute:
external_data_path = osp.join(
dir_path, attr.t.name.replace('/', '_').replace(':', '_'))
if osp.isfile(external_data_path):
os.remove(external_data_path)
onnx.save(onnx_model,
onnx_path,
save_as_external_data=True,
all_tensors_to_one_file=True,
location="model.onnx.data")
def copy_files(local_dir, save_dir, overwrite=True):
if overwrite or not osp.is_exist(osp.join(save_dir, "scheduler")):
shutil.copytree(osp.join(local_dir, "scheduler"),
osp.join(save_dir, "scheduler"),
dirs_exist_ok=True)
if overwrite or not osp.is_exist(osp.join(save_dir, "tokenizer")):
shutil.copytree(osp.join(local_dir, "tokenizer"),
osp.join(save_dir, "tokenizer"),
dirs_exist_ok=True)
if overwrite or not osp.is_exist(osp.join(save_dir, "tokenizer_2")):
shutil.copytree(osp.join(local_dir, "tokenizer_2"),
osp.join(save_dir, "tokenizer_2"),
dirs_exist_ok=True)
if overwrite or not osp.is_exist(osp.join(save_dir, 'model_index.json')):
shutil.copy(osp.join(local_dir, 'model_index.json'),
osp.join(save_dir, 'model_index.json'))
for sub_dir in ['text_encoder', 'text_encoder_2', 'transformer', 'vae']:
if overwrite or not osp.is_exist(
osp.join(save_dir, sub_dir, 'config.json')):
shutil.copy(osp.join(local_dir, sub_dir, 'config.json'),
osp.join(save_dir, sub_dir, 'config.json'))
def export_clip(local_dir,
model_dir="text_encoder",
save_dir=None,
torch_dtype=torch.float32):
save_dir = save_dir or local_dir
clip_save_dir = get_local_path(save_dir, model_dir)
onnx_path = os.path.join(clip_save_dir, "model.onnx")
bs = 1
max_len = 77
sample_inputs = (torch.zeros(bs, max_len, dtype=torch.int32), )
input_names = ["input_ids"]
model = CLIPTextModel.from_pretrained(local_dir,
subfolder=model_dir,
torch_dtype=torch_dtype)
output_names = ["text_embeddings"]
dynamic_axes = {"input_ids": {0: 'B'}, "text_embeddings": {0: 'B'}}
# CLIP export requires nightly pytorch due to bug in onnx parser
with torch.inference_mode():
torch.onnx.export(model,
sample_inputs,
onnx_path,
export_params=True,
input_names=input_names,
output_names=output_names,
dynamic_axes=dynamic_axes)
assert os.path.isfile(onnx_path)
gather_weights_to_one_file(onnx_path)
print(f"Success export clip model: {onnx_path}")
return onnx_path
def export_t5(local_dir,
model_dir="text_encoder_2",
save_dir=None,
torch_dtype=torch.float32):
save_dir = save_dir or local_dir
t5_save_dir = get_local_path(save_dir, model_dir)
onnx_path = os.path.join(t5_save_dir, "model.onnx")
bs = 1
max_len = 512
sample_inputs = (torch.zeros(bs, max_len, dtype=torch.int32), )
input_names = ["input_ids"]
model = T5EncoderModel.from_pretrained(local_dir,
subfolder=model_dir,
torch_dtype=torch_dtype)
output_names = ["text_embeddings"]
dynamic_axes = {"input_ids": {0: 'B'}, "text_embeddings": {0: 'B'}}
with torch.inference_mode():
torch.onnx.export(model,
sample_inputs,
onnx_path,
export_params=True,
input_names=input_names,
output_names=output_names,
dynamic_axes=dynamic_axes)
assert os.path.isfile(onnx_path)
gather_weights_to_one_file(onnx_path)
print(f"Success export t5 model: {onnx_path}")
return onnx_path
# Following decorators required to apply fp16 inference patch to the \
# transformer blocks. Note that we do not export fp16 weights directly to ONNX \
# to allow migraphx to perform optimizations before quantizing down to fp16. \
# This results in better accuracy compared to exporting fp16 directly to onnx.
def transformer_block_clip_wrapper(fn):
def new_forward(*args, **kwargs):
encoder_hidden_states, hidden_states = fn(*args, **kwargs)
return encoder_hidden_states.clip(-65504, 65504), hidden_states
return new_forward
def single_transformer_block_clip_wrapper(fn):
def new_forward(*args, **kwargs):
hidden_states = fn(*args, **kwargs)
return hidden_states.clip(-65504, 65504)
return new_forward
def add_output_clippings_for_fp16(model):
for b in model.transformer_blocks:
b.forward = transformer_block_clip_wrapper(b.forward)
for b in model.single_transformer_blocks:
b.forward = single_transformer_block_clip_wrapper(b.forward)
def export_transformer(local_dir,
model_dir="transformer",
save_dir=None,
torch_dtype=torch.float32,
fp16=True):
save_dir = save_dir or local_dir
transformer_save_dir = get_local_path(save_dir, model_dir)
onnx_path = os.path.join(transformer_save_dir, "model.onnx")
bs = 1
img_height = 1024
img_width = 1024
compression_factor = 8
latent_h = img_height // compression_factor
latent_w = img_width // compression_factor
max_len = 512
config = FluxTransformer2DModel.load_config(local_dir,
subfolder=model_dir)
sample_inputs = (
torch.randn(bs, (latent_h // 2) * (latent_w // 2),
config["in_channels"],
dtype=torch_dtype),
torch.randn(bs,
max_len,
config['joint_attention_dim'],
dtype=torch_dtype),
torch.randn(bs, config['pooled_projection_dim'], dtype=torch_dtype),
torch.tensor([1.] * bs, dtype=torch_dtype),
torch.randn((latent_h // 2) * (latent_w // 2), 3, dtype=torch_dtype),
torch.randn(max_len, 3, dtype=torch_dtype),
torch.tensor([1.] * bs, dtype=torch_dtype),
)
input_names = [
'hidden_states', 'encoder_hidden_states', 'pooled_projections',
'timestep', 'img_ids', 'txt_ids', 'guidance'
]
model = FluxTransformer2DModel.from_pretrained(local_dir,
subfolder=model_dir,
torch_dtype=torch_dtype)
if fp16:
print("applying fp16 clip workarounds to transformer")
add_output_clippings_for_fp16(model)
output_names = ["latent"]
dynamic_axes = {
'hidden_states': {
0: 'B',
1: 'latent_dim'
},
'encoder_hidden_states': {
0: 'B',
1: 'L'
},
'pooled_projections': {
0: 'B'
},
'timestep': {
0: 'B'
},
'img_ids': {
0: 'latent_dim'
},
'txt_ids': {
0: 'L'
},
'guidance': {
0: 'B'
},
}
with torch.inference_mode():
torch.onnx.export(model,
sample_inputs,
onnx_path,
export_params=True,
input_names=input_names,
output_names=output_names,
dynamic_axes=dynamic_axes)
assert os.path.isfile(onnx_path)
gather_weights_to_one_file(onnx_path)
print(f"Success export transformer model: {onnx_path}")
return onnx_path
def export_vae(local_dir,
model_dir="vae",
save_dir=None,
torch_dtype=torch.float32):
save_dir = save_dir or local_dir
vae_save_dir = get_local_path(save_dir, model_dir)
onnx_path = os.path.join(vae_save_dir, "model.onnx")
config = AutoencoderKL.load_config(local_dir, subfolder=model_dir)
bs=1
latent_channels = config['latent_channels']
img_height = 1024
img_width = 1024
compression_factor = 8
latent_h = img_height // compression_factor
latent_w = img_width // compression_factor
sample_inputs = (torch.randn(bs,
latent_channels,
latent_h,
latent_w,
dtype=torch_dtype), )
input_names = ["latent"]
model = AutoencoderKL.from_pretrained(local_dir,
subfolder=model_dir,
torch_dtype=torch_dtype)
model.forward = model.decode
output_names = ["images"]
dynamic_axes = {
'latent': {
0: 'B',
2: 'H',
3: 'W'
},
'images': {
0: 'B',
2: '8H',
3: '8W'
}
}
with torch.inference_mode():
torch.onnx.export(model,
sample_inputs,
onnx_path,
export_params=True,
input_names=input_names,
output_names=output_names,
dynamic_axes=dynamic_axes)
assert os.path.isfile(onnx_path)
gather_weights_to_one_file(onnx_path)
print(f"Success export vae_decoder model: {onnx_path}")
return onnx_path
def parse_args():
parser = argparse.ArgumentParser(description="export ONNX models")
parser.add_argument("--local-dir",
type=str,
required=True,
help="local directory containing the model")
parser.add_argument("--save-dir",
type=str,
required=None,
help="the directory for saving ONNX models")
args = parser.parse_args()
if args.save_dir is None:
args.save_dir = args.local_dir
return args
def main():
args = parse_args()
local_dir = args.local_dir
save_dir = args.save_dir
os.makedirs(save_dir, exist_ok=True)
export_clip(local_dir, save_dir=save_dir)
export_t5(local_dir, save_dir=save_dir)
export_transformer(local_dir, save_dir=save_dir)
export_vae(local_dir, save_dir=save_dir)
if save_dir != local_dir:
copy_files(local_dir, save_dir, overwrite=True)
if __name__ == "__main__":
main()
from collections import namedtuple
import csv
import json
import os
import os.path as osp
from diffusers import DiffusionPipeline
import migraphx_diffusers
from migraphx_diffusers import get_name_and_migraphx_config
import torch
def parse_args():
from argparse import ArgumentParser
parser = ArgumentParser(description="SDXL inference with migraphx backend")
#=========================== mdoel load and compile ========================
parser.add_argument(
"-m",
"--model-dir",
type=str,
required=True,
help="Path to local model directory.",
)
parser.add_argument(
"--force-compile",
action="store_true",
default=False,
help="Ignore existing .mxr files and override them",
)
parser.add_argument(
"--img-size",
type=int,
default=None,
help="output image size",
)
parser.add_argument(
"--num-images-per-prompt",
type=int,
default=1,
help="The number of images to generate per prompt."
)
# --------------------------------------------------------------------------
# =============================== generation ===============================
parser.add_argument(
"-t",
"--num-inference-steps",
type=int,
default=None,
help="Number of iteration steps",
)
parser.add_argument(
"--true-cfg-scale",
default=None,
type=float,
help="Olny for flux pipeline. When > 1.0 and a provided `negative_prompt`, " \
"enables true classifier-free guidance."
)
parser.add_argument(
"--guidance-scale",
default=None,
type=float,
help="Guidance scale is enabled by setting `guidance_scale > 1`. Higher " \
"guidance scale encourages to generate images that are closely linked to " \
"the text `prompt`, usually at the expense of lower image quality."
)
parser.add_argument(
"-s",
"--seed",
type=int,
default=42,
help="Random seed",
)
# --------------------------------------------------------------------------
# ================================ control =================================
parser.add_argument(
"-p",
"--parti-prompts-file",
type=str,
required=True,
help="Number of iteration steps",
)
parser.add_argument(
"--count-submodels",
action="store_true",
help="count running time for each submodel",
)
parser.add_argument(
"--save-dir",
type=str,
default=None,
help="Path to save images",
)
parser.add_argument(
"--resume",
action="store_true",
help="resume image generation",
)
# --------------------------------------------------------------------------
args = parser.parse_args()
return args
def parse_prompts(parti_prompts_file):
Prompt = namedtuple("Prompt",
["prompt_text", "category", "challenge", "note"])
prompt_list = []
with open(parti_prompts_file, "r") as f:
csv_reader = csv.reader(f, delimiter="\t")
for i, row in enumerate(csv_reader):
if i == 0:
continue
prompt_list.append(Prompt(*row))
return prompt_list
def main():
args = parse_args()
pipe_name, migraphx_config = get_name_and_migraphx_config(args.model_dir)
if args.img_size is not None:
migraphx_config['common_args']['img_size'] = args.img_size
migraphx_config['common_args'].update(dict(
batch=args.num_images_per_prompt,
force_compile=args.force_compile,
))
pipe = DiffusionPipeline.from_pretrained(
args.model_dir,
torch_dtype=torch.float16,
migraphx_config=migraphx_config
)
pipe.to("cuda")
call_kwargs = {}
if args.num_inference_steps is not None:
call_kwargs['num_inference_steps'] = args.num_inference_steps
if args.guidance_scale is not None:
call_kwargs['guidance_scale'] = args.guidance_scale
if args.true_cfg_scale is not None:
assert pipe_name == 'flux.1-dev', \
"`true_cfg_scale` is only valid for flux.1-dev pipeline!"
call_kwargs['true_cfg_scale'] = args.true_cfg_scale
if args.seed is not None:
call_kwargs['generator'] = torch.Generator("cuda").manual_seed(args.seed)
os.makedirs(args.save_dir, exist_ok=True)
generator = torch.Generator("cuda").manual_seed(args.seed)
print("Generating image...")
for i, prompt in enumerate(parse_prompts(args.parti_prompts_file)):
sub_dir = osp.join(args.save_dir,
prompt.category.replace(" ", "").replace("&", "_"),
f"prompt_{i:0>4d}")
prompt_json = osp.join(sub_dir, "prompt_info.json")
# =========================== resume =========================
if args.resume:
check_file_list = [osp.join(sub_dir, f"image_{j:0>2d}.png")
for j in range(args.num_images_per_prompt)]
check_file_list.append(prompt_json)
if all([osp.exists(f) for f in check_file_list]):
print(f"Skipping prompt {i}: \"{prompt.prompt_text}\"")
continue
# =========================== generate image =========================
print(f"Processing prompt {i}: \"{prompt.prompt_text}\"")
if not osp.isdir(sub_dir):
os.makedirs(sub_dir, exist_ok=True)
with open(prompt_json, "w") as f:
json.dump(prompt._asdict(), f)
images = pipe(
prompt=prompt.prompt_text,
**call_kwargs
).images
for j, image in enumerate(images):
save_path = osp.join(sub_dir, f"{j:0>2d}.png")
image.save(save_path)
print(f"Generated image: {save_path}")
if __name__ == "__main__":
main()
import json
import os
import os.path as osp
from diffusers import DiffusionPipeline
import migraphx_diffusers
from migraphx_diffusers import get_name_and_migraphx_config
import torch
def parse_args():
from argparse import ArgumentParser
parser = ArgumentParser(description="SDXL inference with migraphx backend")
#=========================== mdoel load and compile ========================
parser.add_argument(
"-m",
"--model-dir",
type=str,
required=True,
help="Path to local model directory.",
)
parser.add_argument(
"--force-compile",
action="store_true",
default=False,
help="Ignore existing .mxr files and override them",
)
parser.add_argument(
"--num-images-per-prompt",
type=int,
default=1,
help="The number of images to generate per prompt."
)
parser.add_argument(
"--img-size",
type=int,
default=None,
help="output image size",
)
# --------------------------------------------------------------------------
# =============================== generation ===============================
parser.add_argument(
"-t",
"--num-inference-steps",
type=int,
default=None,
help="Number of iteration steps",
)
parser.add_argument(
"--true-cfg-scale",
default=None,
type=float,
help="Olny for flux pipeline. When > 1.0 and a provided `negative_prompt`, " \
"enables true classifier-free guidance."
)
parser.add_argument(
"--guidance-scale",
default=None,
type=float,
help="Guidance scale is enabled by setting `guidance_scale > 1`. Higher " \
"guidance scale encourages to generate images that are closely linked to " \
"the text `prompt`, usually at the expense of lower image quality."
)
parser.add_argument(
"-s",
"--seed",
type=int,
default=42,
help="Random seed",
)
# --------------------------------------------------------------------------
parser.add_argument(
"--examples-json",
type=str,
default="./examples/prompts_and_negative_prompts.json",
help="Prompts and negative prompts data path",
)
parser.add_argument(
"--output-dir",
type=str,
default=None,
help="Path to save images",
)
args = parser.parse_args()
return args
def parse_prompts(examples_json):
with open(examples_json, 'r') as f:
prompt_data = json.load(f)
return prompt_data
def main():
args = parse_args()
pipe_name, migraphx_config = get_name_and_migraphx_config(args.model_dir)
if args.output_dir is None:
args.output_dir = f"./examples/{pipe_name}-images-{args.img_size}"
if args.img_size is not None:
migraphx_config['common_args']['img_size'] = args.img_size
migraphx_config['common_args'].update(dict(
batch=args.num_images_per_prompt,
force_compile=args.force_compile,
))
pipe = DiffusionPipeline.from_pretrained(
args.model_dir,
torch_dtype=torch.float16,
migraphx_config=migraphx_config
)
pipe.to("cuda")
call_kwargs = {}
if args.num_inference_steps is not None:
call_kwargs['num_inference_steps'] = args.num_inference_steps
if args.guidance_scale is not None:
call_kwargs['guidance_scale'] = args.guidance_scale
if args.true_cfg_scale is not None:
assert pipe_name == 'flux.1-dev', \
"`true_cfg_scale` is only valid for flux.1-dev pipeline!"
call_kwargs['true_cfg_scale'] = args.true_cfg_scale
if args.seed is not None:
call_kwargs['generator'] = torch.Generator("cuda").manual_seed(args.seed)
prompt_data = parse_prompts(args.examples_json)
cnt = 0
for i, d in enumerate(prompt_data):
theme = d["theme"]
pairs = d["examples"]
sub_dir = osp.join(args.output_dir,
f"{i}-{theme.title().replace(' ', '')}")
os.makedirs(sub_dir, exist_ok=True)
for j, pair in enumerate(pairs):
print(f"Generating image {cnt}...")
prompt = pair["prompt"]
negative_prompt = pair["negative_prompt"]
print(f"Prompt: {prompt}")
print(f"negative Prompt: {negative_prompt}")
images = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
**call_kwargs
).images
for k, image in enumerate(images):
save_path = osp.join(
sub_dir, f"theme_{i}_example_{j}_image_{k}.png")
image.save(save_path)
print(f"Image saved: {save_path}")
cnt += 1
print(f"Total {cnt} images Generated!")
if __name__ == "__main__":
main()
import json
import os.path as osp
from diffusers import DiffusionPipeline
import migraphx_diffusers
from migraphx_diffusers import get_name_and_migraphx_config
import torch
def parse_args():
from argparse import ArgumentParser
parser = ArgumentParser(description="SDXL inference with migraphx backend")
#=========================== mdoel load and compile ========================
parser.add_argument(
"-m",
"--model-dir",
type=str,
required=True,
help="Path to local model directory.",
)
parser.add_argument(
"--force-compile",
action="store_true",
default=False,
help="Ignore existing .mxr files and override them",
)
parser.add_argument(
"--img-size",
type=int,
default=None,
help="output image size",
)
parser.add_argument(
"--num-images-per-prompt",
type=int,
default=1,
help="The number of images to generate per prompt."
)
# --------------------------------------------------------------------------
# =============================== generation ===============================
parser.add_argument(
"-p",
"--prompt",
type=str,
required=True,
help="Prompt for describe image content, style and so on."
)
parser.add_argument(
"-n",
"--negative-prompt",
type=str,
default=None,
help="Negative prompt",
)
parser.add_argument(
"-t",
"--num-inference-steps",
type=int,
default=None,
help="Number of iteration steps",
)
parser.add_argument(
"--true-cfg-scale",
default=None,
type=float,
help="Olny for flux pipeline. When > 1.0 and a provided `negative_prompt`, " \
"enables true classifier-free guidance."
)
parser.add_argument(
"--guidance-scale",
default=None,
type=float,
help="Guidance scale is enabled by setting `guidance_scale > 1`. Higher " \
"guidance scale encourages to generate images that are closely linked to " \
"the text `prompt`, usually at the expense of lower image quality."
)
parser.add_argument(
"-s",
"--seed",
type=int,
default=42,
help="Random seed",
)
parser.add_argument(
"--save-prefix",
type=str,
default=None,
help="Prefix of path for saving results",
)
# --------------------------------------------------------------------------
args = parser.parse_args()
return args
def main():
args = parse_args()
pipe_name, migraphx_config = get_name_and_migraphx_config(args.model_dir)
if args.save_prefix is None:
args.save_prefix = f"./{pipe_name}_output"
if args.img_size is not None:
migraphx_config['common_args']['img_size'] = args.img_size
migraphx_config['common_args'].update(dict(
batch=args.num_images_per_prompt,
force_compile=args.force_compile,
))
pipe = DiffusionPipeline.from_pretrained(
args.model_dir,
torch_dtype=torch.float16,
migraphx_config=migraphx_config
)
pipe.to("cuda")
call_kwargs = {}
if args.num_inference_steps is not None:
call_kwargs['num_inference_steps'] = args.num_inference_steps
if args.guidance_scale is not None:
call_kwargs['guidance_scale'] = args.guidance_scale
if args.true_cfg_scale is not None:
assert pipe_name == 'flux.1-dev', \
"`true_cfg_scale` is only valid for flux.1-dev pipeline!"
call_kwargs['true_cfg_scale'] = args.true_cfg_scale
if args.seed is not None:
call_kwargs['generator'] = torch.Generator("cuda").manual_seed(args.seed)
print("Generating image...")
images = pipe(
prompt=args.prompt,
negative_prompt=args.negative_prompt,
**call_kwargs
).images
for i, image in enumerate(images):
save_path = f"{args.save_prefix}_{i}.png"
image.save(save_path)
print(f"Generated image: {save_path}")
if __name__ == "__main__":
main()
import json
import os.path as osp
import time
from diffusers import DiffusionPipeline
import migraphx_diffusers
from migraphx_diffusers import AutoTimer, get_name_and_migraphx_config
import torch
def parse_args():
date_str = time.strftime("%Y%m%d-%H%M%S", time.localtime())
from argparse import ArgumentParser
parser = ArgumentParser(description="SDXL inference with migraphx backend")
#=========================== mdoel load and compile ========================
parser.add_argument(
"-m",
"--model-dir",
type=str,
required=True,
help="Path to local model directory.",
)
parser.add_argument(
"--force-compile",
action="store_true",
default=False,
help="Ignore existing .mxr files and override them",
)
parser.add_argument(
"--img-size",
type=int,
default=None,
help="output image size",
)
parser.add_argument(
"--num-images-per-prompt",
type=int,
default=1,
help="The number of images to generate per prompt."
)
# --------------------------------------------------------------------------
# =============================== generation ===============================
parser.add_argument(
"-t",
"--num-inference-steps",
type=int,
default=50,
help="Number of iteration steps",
)
parser.add_argument(
"--out-csv-file",
type=str,
default=f"./perf-{date_str}.csv",
help="Prefix of path for saving results",
)
# --------------------------------------------------------------------------
# =============================== time count ===============================
parser.add_argument(
"--count-submodels",
action="store_true",
help="count running time for each submodel",
)
parser.add_argument(
"--num-warmup-loops",
type=int,
default=1,
help="warmup loops",
)
parser.add_argument(
"--num-count-loops",
type=int,
default=100,
help="time count loops",
)
# --------------------------------------------------------------------------
args = parser.parse_args()
return args
def get_default_prompt(pipe_name):
negative_prompt = "ugly"
if pipe_name == 'sd2.1':
prompt = "a photo of an astronaut riding a horse on mars"
elif pipe_name == 'sdxl':
prompt = "An astronaut riding a green horse", None
elif pipe_name == 'flux.1-dev':
prompt = "A cat holding a sign that says hello world"
else:
raise ValueError(f"{pipe_name} is not supported!")
return prompt, negative_prompt
def set_timer(timer, pipe, pipe_name, count_submodels=False):
timer.add_target(pipe, key="end2end")
if not count_submodels:
return
if pipe_name == 'sd2.1':
timer.add_targets([
(pipe.text_encoder, "text_encoder"),
(pipe.unet, "unet"),
(pipe.vae.decode, "vae_decoder")
])
elif pipe_name == 'sdxl':
timer.add_targets([
(pipe.text_encoder, "text_encoder"),
(pipe.text_encoder_2, "text_encoder_2"),
(pipe.unet, "unet"),
(pipe.vae.decode, "vae_decoder")
])
elif pipe_name == 'flux.1-dev':
timer.add_targets([
(pipe.text_encoder, "text_encoder"),
(pipe.text_encoder_2, "text_encoder_2"),
(pipe.transformer, "transformer"),
(pipe.vae.decode, "vae_decoder")
])
else:
raise ValueError(f"{pipe_name} is not supported!")
def test_latency(pipe, timer, prompt, negative_prompt=None, batch=1,
num_inference_steps=50, num_warmup_loops=1,
num_count_loops=100, title=None, out_csv_file=None,
**call_kwargs):
date_str = time.strftime("%Y%m%d-%H%M%S", time.localtime())
if not out_csv_file:
out_csv_file = f"./perf-{date_str}.csv"
for i in range(num_warmup_loops + num_count_loops):
if i == num_warmup_loops:
timer.start_work()
pipe(prompt=prompt,
negative_prompt=negative_prompt,
num_inference_steps=num_inference_steps,
**call_kwargs)
table = timer.summary(batchsize=batch, title=title)
with open(out_csv_file, 'a') as f:
f.write(table.get_csv_string())
timer.clear()
timer.finish_work()
def main():
args = parse_args()
pipe_name, migraphx_config = get_name_and_migraphx_config(args.model_dir)
assert pipe_name in ['sdxl', 'sd2.1', 'flux.1-dev'], \
"Only support (1)SDXL (2)SD2.1 (3)Flux.1-dev!"
if args.img_size is not None:
migraphx_config['common_args']['img_size'] = args.img_size
migraphx_config['common_args'].update(dict(
batch=args.num_images_per_prompt,
force_compile=args.force_compile,
))
pipe = DiffusionPipeline.from_pretrained(
args.model_dir,
torch_dtype=torch.float16,
migraphx_config=migraphx_config
)
pipe.to("cuda")
t = AutoTimer()
set_timer(t, pipe, pipe_name, count_submodels=args.count_submodels)
prompt, negative_prompt = get_default_prompt(pipe_name)
test_latency(pipe, t, prompt,
batch=args.num_images_per_prompt,
num_inference_steps=args.num_inference_steps,
num_warmup_loops=args.num_warmup_loops,
num_count_loops=args.num_count_loops,
title=f"{pipe_name} Latency (Only Prompt)",
out_csv_file=args.out_csv_file)
if pipe_name == 'flux.1-dev':
test_latency(pipe, t, prompt,
negative_prompt=negative_prompt,
batch=args.num_images_per_prompt,
num_inference_steps=args.num_inference_steps,
num_warmup_loops=args.num_warmup_loops,
num_count_loops=args.num_count_loops,
title=f"{pipe_name} Latency (Prompt + NegativePrompt)",
out_csv_file=args.out_csv_file,
true_cfg_scale=2.0)
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment