app.py 14.6 KB
Newer Older
chenpangpang's avatar
chenpangpang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
import torch
import torchvision.transforms.functional as TF
import numpy as np
import random
import os
import sys

from diffusers.utils import load_image
from diffusers import EulerDiscreteScheduler, T2IAdapter

import gradio as gr

from pipeline_t2i_adapter import PhotoMakerStableDiffusionXLAdapterPipeline
from face_utils import FaceAnalysis2, analyze_faces

from style_template import styles
from aspect_ratio_template import aspect_ratios

# global variable
base_model_path = 'SG161222/RealVisXL_V4.0'
face_detector = FaceAnalysis2(providers=['CPUExecutionProvider', 'CUDAExecutionProvider'],
                              allowed_modules=['detection', 'recognition'])
face_detector.prepare(ctx_id=0, det_size=(640, 640))

try:
    if torch.cuda.is_available():
        device = "cuda"
    elif sys.platform == "darwin" and torch.backends.mps.is_available():
        device = "mps"
    else:
        device = "cpu"
except:
    device = "cpu"

MAX_SEED = np.iinfo(np.int32).max
STYLE_NAMES = list(styles.keys())
DEFAULT_STYLE_NAME = "Photographic (Default)"
ASPECT_RATIO_LABELS = list(aspect_ratios)
DEFAULT_ASPECT_RATIO = ASPECT_RATIO_LABELS[0]

enable_doodle_arg = False

if torch.cuda.is_available() and torch.cuda.is_bf16_supported():
    torch_dtype = torch.bfloat16
else:
    torch_dtype = torch.float16

if device == "mps":
    torch_dtype = torch.float16

# load adapter
adapter = T2IAdapter.from_pretrained(
    "TencentARC/t2i-adapter-sketch-sdxl-1.0", torch_dtype=torch_dtype, variant="fp16"
).to(device)

pipe = PhotoMakerStableDiffusionXLAdapterPipeline.from_pretrained(
    base_model_path,
    adapter=adapter,
    torch_dtype=torch_dtype,
    use_safetensors=True,
    variant="fp16",
).to(device)

pipe.load_photomaker_adapter(
    "TencentARC/PhotoMaker-V2",
    subfolder="",
    weight_name="photomaker-v2.bin",
    trigger_word="img",
    pm_version="v2",
)
pipe.id_encoder.to(device)

pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
# pipe.set_adapters(["photomaker"], adapter_weights=[1.0])
pipe.fuse_lora()
pipe.to(device)


def generate_image(
        upload_images,
        prompt,
        negative_prompt,
        aspect_ratio_name,
        style_name,
        num_steps,
        style_strength_ratio,
        num_outputs,
        guidance_scale,
        seed,
        use_doodle,
        sketch_image,
        adapter_conditioning_scale,
        adapter_conditioning_factor,
        progress=gr.Progress(track_tqdm=True)
):
    if use_doodle:
        sketch_image = sketch_image["composite"]
        r, g, b, a = sketch_image.split()
        sketch_image = a.convert("RGB")
        sketch_image = TF.to_tensor(sketch_image) > 0.5  # Inversion
        sketch_image = TF.to_pil_image(sketch_image.to(torch.float32))
        adapter_conditioning_scale = adapter_conditioning_scale
        adapter_conditioning_factor = adapter_conditioning_factor
    else:
        adapter_conditioning_scale = 0.
        adapter_conditioning_factor = 0.
        sketch_image = None

    # check the trigger word
    image_token_id = pipe.tokenizer.convert_tokens_to_ids(pipe.trigger_word)
    input_ids = pipe.tokenizer.encode(prompt)
    if image_token_id not in input_ids:
        raise gr.Error(f"Cannot find the trigger word '{pipe.trigger_word}' in text prompt! Please refer to step 2️⃣")

    if input_ids.count(image_token_id) > 1:
        raise gr.Error(f"Cannot use multiple trigger words '{pipe.trigger_word}' in text prompt!")

    # determine output dimensions by the aspect ratio
    output_w, output_h = aspect_ratios[aspect_ratio_name]
    print(f"[Debug] Generate image using aspect ratio [{aspect_ratio_name}] => {output_w} x {output_h}")

    # apply the style template
    prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)

    if upload_images is None:
        raise gr.Error(f"Cannot find any input face image! Please refer to step 1️⃣")

    input_id_images = []
    for img in upload_images:
        input_id_images.append(load_image(img))

    id_embed_list = []

    for img in input_id_images:
        img = np.array(img)
        img = img[:, :, ::-1]
        faces = analyze_faces(face_detector, img)
        if len(faces) > 0:
            id_embed_list.append(torch.from_numpy((faces[0]['embedding'])))

    if len(id_embed_list) == 0:
        raise gr.Error(f"No face detected, please update the input face image(s)")

    id_embeds = torch.stack(id_embed_list)

    generator = torch.Generator(device=device).manual_seed(seed)

    print("Start inference...")
    print(f"[Debug] Seed: {seed}")
    print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
    start_merge_step = int(float(style_strength_ratio) / 100 * num_steps)
    if start_merge_step > 30:
        start_merge_step = 30
    print(start_merge_step)
    images = pipe(
        prompt=prompt,
        width=output_w,
        height=output_h,
        input_id_images=input_id_images,
        negative_prompt=negative_prompt,
        num_images_per_prompt=num_outputs,
        num_inference_steps=num_steps,
        start_merge_step=start_merge_step,
        generator=generator,
        guidance_scale=guidance_scale,
        id_embeds=id_embeds,
        image=sketch_image,
        adapter_conditioning_scale=adapter_conditioning_scale,
        adapter_conditioning_factor=adapter_conditioning_factor,
    ).images
    return images, gr.update(visible=True)


def swap_to_gallery(images):
    return gr.update(value=images, visible=True), gr.update(visible=True), gr.update(visible=False)


def upload_example_to_gallery(images, prompt, style, negative_prompt):
    return gr.update(value=images, visible=True), gr.update(visible=True), gr.update(visible=False)


def remove_back_to_files():
    return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)


def change_doodle_space(use_doodle):
    if use_doodle:
        return gr.update(visible=True)
    else:
        return gr.update(visible=False)


def remove_tips():
    return gr.update(visible=False)


def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    return seed


def apply_style(style_name: str, positive: str, negative: str = "") -> tuple[str, str]:
    p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
    return p.replace("{prompt}", positive), n + ' ' + negative


def get_image_path_list(folder_name):
    image_basename_list = os.listdir(folder_name)
    image_path_list = sorted([os.path.join(folder_name, basename) for basename in image_basename_list])
    return image_path_list


def get_example():
    case = [
        [
            get_image_path_list('./examples/scarletthead_woman'),
            "instagram photo, portrait photo of a woman img, colorful, perfect face, natural skin, hard shadows, film grain",
            "(No style)",
            "(asymmetry, worst quality, low quality, illustration, 3d, 2d, painting, cartoons, sketch), open mouth",
        ],
        [
            get_image_path_list('./examples/newton_man'),
            "sci-fi, closeup portrait photo of a man img wearing the sunglasses in Iron man suit, face, slim body, high quality, film grain",
            "(No style)",
            "(asymmetry, worst quality, low quality, illustration, 3d, 2d, painting, cartoons, sketch), open mouth",
        ],
    ]
    return case


### Description and style
logo = r"""
<center><img src='https://photo-maker.github.io/assets/logo.png' alt='PhotoMaker logo' style="width:80px; margin-bottom:10px"></center>
"""
title = r"""
237
<h1 align="center">PhotoMaker V2:与PhotoMaker V1相比,改进了ID保真度和更好的可控性</h1>
chenpangpang's avatar
chenpangpang committed
238
239
240
"""

description = r"""
241
242
243
244
245
246
247
使用方法可参考🎬 <a href='https://photo-maker.github.io/assets/demo_pm_v2_full.mp4' target='_blank'>这个视频</a> 🎬.<br>
个性化步骤:<br>
1️⃣ 上传您想要自定义的某个人的图像。一张或多张图片都行,建议多张。此工具不进行人脸检测,上传图像中的人脸应该占据图像的大部分。<br>
2️⃣ 输入文本提示符,确保按照您想要自定义的类单词使用触发词: `img`, 例如: `man img` 、 `woman img` 或 `girl img`。<br>
3️⃣ 选择您喜欢的风格模板。<br>
4️⃣ <b>(可选:新特性)</b> 选择“启用绘图涂鸦...”选项,然后在画布上绘图<br>
5️⃣ 单击提交按钮开始自定义。
chenpangpang's avatar
chenpangpang committed
248
249
250
"""

tips = r"""
251
252
253
### 使用技巧
1. 上传多张要定制的人的照片,以**提高身份识别精度**。
2. 如果您发现使用涂鸦进行控制时图像质量很差,您可以减少`conditioning scale`和`factor of the adapter`。
chenpangpang's avatar
chenpangpang committed
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
"""
# We have provided some generate examples and comparisons at: [this website]().

css = '''
.gradio-container {width: 85% !important}
'''
with gr.Blocks(css=css) as demo:
    gr.Markdown(logo)
    gr.Markdown(title)
    gr.Markdown(description)
    # gr.DuplicateButton(
    #     value="Duplicate Space for private use ",
    #     elem_id="duplicate-button",
    #     visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
    # )
    with gr.Row():
        with gr.Column():
            files = gr.Files(
272
                label="上传/选择一张或多张人脸照片",
chenpangpang's avatar
chenpangpang committed
273
274
                file_types=["image"]
            )
275
            uploaded_files = gr.Gallery(label="你的图片", visible=False, columns=5, rows=1, height=200)
chenpangpang's avatar
chenpangpang committed
276
            with gr.Column(visible=False) as clear_button:
277
                remove_and_reupload = gr.ClearButton(value="移除并重新上传", components=files, size="sm")
chenpangpang's avatar
chenpangpang committed
278
            prompt = gr.Textbox(label="Prompt",
279
                                info="尝试类似'a photo of a man/woman img'的词, 'img'是触发词,必须包含",
chenpangpang's avatar
chenpangpang committed
280
                                placeholder="A photo of a [man/woman img]...")
281
282
            style = gr.Dropdown(label="风格", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME)
            aspect_ratio = gr.Dropdown(label="输出纵横比", choices=ASPECT_RATIO_LABELS,
chenpangpang's avatar
chenpangpang committed
283
                                       value=DEFAULT_ASPECT_RATIO)
284
            submit = gr.Button("提交")
chenpangpang's avatar
chenpangpang committed
285
286

            enable_doodle = gr.Checkbox(
287
288
                label="启用绘图涂鸦进行控制", value=enable_doodle_arg,
                info="启用此选项后,PhotoMaker将通过T2I-Adapter根据您在画布上的涂鸦生成内容(质量可能会降低)",
chenpangpang's avatar
chenpangpang committed
289
            )
290
            with gr.Accordion("T2I-Adapter-涂鸦 (可选)", visible=False) as doodle_space:
chenpangpang's avatar
chenpangpang committed
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
                with gr.Row():
                    sketch_image = gr.Sketchpad(
                        label="Canvas",
                        type="pil",
                        crop_size=[1024, 1024],
                        layers=False,
                        canvas_size=(350, 350),
                        brush=gr.Brush(default_size=5, colors=["#000000"], color_mode="fixed")
                    )
                    with gr.Group():
                        adapter_conditioning_scale = gr.Slider(
                            label="Adapter conditioning scale",
                            minimum=0.5,
                            maximum=1,
                            step=0.1,
                            value=0.7,
                        )
                        adapter_conditioning_factor = gr.Slider(
                            label="Adapter conditioning factor",
                            info="Fraction of timesteps for which adapter should be applied",
                            minimum=0.5,
                            maximum=1,
                            step=0.1,
                            value=0.8,
                        )
316
            with gr.Accordion(open=False, label="高级选项"):
chenpangpang's avatar
chenpangpang committed
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
                negative_prompt = gr.Textbox(
                    label="Negative Prompt",
                    placeholder="low quality",
                    value="nsfw, lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry",
                )
                num_steps = gr.Slider(
                    label="Number of sample steps",
                    minimum=20,
                    maximum=100,
                    step=1,
                    value=50,
                )
                style_strength_ratio = gr.Slider(
                    label="Style strength (%)",
                    minimum=15,
                    maximum=50,
                    step=1,
                    value=20,
                )
                num_outputs = gr.Slider(
                    label="Number of output images",
                    minimum=1,
                    maximum=4,
                    step=1,
                    value=2,
                )
                guidance_scale = gr.Slider(
                    label="Guidance scale",
                    minimum=0.1,
                    maximum=10.0,
                    step=0.1,
                    value=5,
                )
                seed = gr.Slider(
                    label="Seed",
                    minimum=0,
                    maximum=MAX_SEED,
                    step=1,
                    value=0,
                )
                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
        with gr.Column():
359
360
            gallery = gr.Gallery(label="生成图片")
            usage_tips = gr.Markdown(label="使用技巧", value=tips, visible=False)
chenpangpang's avatar
chenpangpang committed
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406

        files.upload(fn=swap_to_gallery, inputs=files, outputs=[uploaded_files, clear_button, files])
        remove_and_reupload.click(fn=remove_back_to_files, outputs=[uploaded_files, clear_button, files])
        enable_doodle.select(fn=change_doodle_space, inputs=enable_doodle, outputs=doodle_space)

        input_list = [
            files,
            prompt,
            negative_prompt,
            aspect_ratio,
            style,
            num_steps,
            style_strength_ratio,
            num_outputs,
            guidance_scale,
            seed,
            enable_doodle,
            sketch_image,
            adapter_conditioning_scale,
            adapter_conditioning_factor
        ]

        submit.click(
            fn=remove_tips,
            outputs=usage_tips,
        ).then(
            fn=randomize_seed_fn,
            inputs=[seed, randomize_seed],
            outputs=seed,
            queue=False,
            api_name=False,
        ).then(
            fn=generate_image,
            inputs=input_list,
            outputs=[gallery, usage_tips]
        )

    gr.Examples(
        examples=get_example(),
        inputs=[files, prompt, style, negative_prompt],
        run_on_click=True,
        fn=upload_example_to_gallery,
        outputs=[uploaded_files, clear_button, files],
    )

demo.launch(server_name='0.0.0.0', share=True)