feat: Dockerfile改进，显示界面改成标题和提示为中文、可生成公开链接的方式。

00b2ff19 · chenpangpang · c90df435 · 00b2ff19 · 00b2ff19
Commit 00b2ff19 authored Jul 18, 2024 by chenpangpang
Hide whitespace changes
Inline Side-by-side

Showing with 99 additions and 93 deletions

Dockerfile Dockerfile +1 -1

InstantID/gradio_demo/app.py InstantID/gradio_demo/app.py +98 -92

No files found.
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,7 +12,7 @@ RUN pip install -r gradio_demo/requirements.txt && \
 # Prod  #
 #########
 FROM image.sourcefind.cn:5000/gpu/admin/base/jupyterlab-pytorch:2.2.0-python3.10-cuda12.1-ubuntu22.04
+RUN apt-get update && apt-get install -y libglib2.0-0 libgl1-mesa-glx
 COPY --from=base /opt/conda/lib/python3.10/site-packages /opt/conda/lib/python3.10/site-packages
 COPY --from=base /root/instantid/InstantID /root/InstantID
 COPY --from=base /root/instantid/启动器.ipynb /root/instantid/run.sh /root/
-# RUN apt-get update && apt-get install -y libglib2.0-0 libgl1-mesa-glx
\ No newline at end of file
--- a/InstantID/gradio_demo/app.py
+++ b/InstantID/gradio_demo/app.py
 import sys
+
 sys.path.append('./')

 from typing import Tuple
@@ -48,34 +49,34 @@ controlnet_path = f'./checkpoints/ControlNetModel'
 # Load pipeline
 controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)

-def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):

+def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):
    if pretrained_model_name_or_path.endswith(
            ".ckpt"
-        ) or pretrained_model_name_or_path.endswith(".safetensors"):
-            scheduler_kwargs = hf_hub_download(
-                repo_id="wangqixun/YamerMIX_v8",
-                subfolder="scheduler",
-                filename="scheduler_config.json",
-            )
+    ) or pretrained_model_name_or_path.endswith(".safetensors"):
+        scheduler_kwargs = hf_hub_download(
+            repo_id="wangqixun/YamerMIX_v8",
+            subfolder="scheduler",
+            filename="scheduler_config.json",
+        )

-            (tokenizers, text_encoders, unet, _, vae) = load_models_xl(
-                pretrained_model_name_or_path=pretrained_model_name_or_path,
-                scheduler_name=None,
-                weight_dtype=dtype,
-            )
+        (tokenizers, text_encoders, unet, _, vae) = load_models_xl(
+            pretrained_model_name_or_path=pretrained_model_name_or_path,
+            scheduler_name=None,
+            weight_dtype=dtype,
+        )

-            scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
-            pipe = StableDiffusionXLInstantIDPipeline(
-                vae=vae,
-                text_encoder=text_encoders[0],
-                text_encoder_2=text_encoders[1],
-                tokenizer=tokenizers[0],
-                tokenizer_2=tokenizers[1],
-                unet=unet,
-                scheduler=scheduler,
-                controlnet=controlnet,
-            ).to(device)
+        scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
+        pipe = StableDiffusionXLInstantIDPipeline(
+            vae=vae,
+            text_encoder=text_encoders[0],
+            text_encoder_2=text_encoders[1],
+            tokenizer=tokenizers[0],
+            tokenizer_2=tokenizers[1],
+            unet=unet,
+            scheduler=scheduler,
+            controlnet=controlnet,
+        ).to(device)

    else:
        pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
@@ -92,6 +93,7 @@ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=F
    # load and disable LCM
    pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
    pipe.disable_lora()
+
    def toggle_lcm_ui(value):
        if value:
            return (
@@ -103,12 +105,12 @@ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=F
                gr.update(minimum=5, maximum=100, step=1, value=30),
                gr.update(minimum=0.1, maximum=20.0, step=0.1, value=5)
            )
-    
+
    def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
        if randomize_seed:
            seed = random.randint(0, MAX_SEED)
        return seed
-    
+
    def remove_tips():
        return gr.update(visible=False)

@@ -156,7 +158,7 @@ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=F
    def convert_from_image_to_cv2(img: Image) -> np.ndarray:
        return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)

-    def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
+    def draw_kps(image_pil, kps, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]):
        stickwidth = 4
        limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
        kps = np.array(kps)
@@ -172,7 +174,8 @@ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=F
            y = kps[index][:, 1]
            length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
            angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
-            polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
+            polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0,
+                                       360, 1)
            out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
        out_img = (out_img * 0.6).astype(np.uint8)

@@ -184,78 +187,81 @@ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=F
        out_img_pil = Image.fromarray(out_img.astype(np.uint8))
        return out_img_pil

-    def resize_img(input_image, max_side=1280, min_side=1024, size=None, 
-                pad_to_max_side=False, mode=PIL.Image.BILINEAR, base_pixel_number=64):
-
-            w, h = input_image.size
-            if size is not None:
-                w_resize_new, h_resize_new = size
-            else:
-                ratio = min_side / min(h, w)
-                w, h = round(ratio*w), round(ratio*h)
-                ratio = max_side / max(h, w)
-                input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
-                w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
-                h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
-            input_image = input_image.resize([w_resize_new, h_resize_new], mode)
-
-            if pad_to_max_side:
-                res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
-                offset_x = (max_side - w_resize_new) // 2
-                offset_y = (max_side - h_resize_new) // 2
-                res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
-                input_image = Image.fromarray(res)
-            return input_image
+    def resize_img(input_image, max_side=1280, min_side=1024, size=None,
+                   pad_to_max_side=False, mode=PIL.Image.BILINEAR, base_pixel_number=64):
+
+        w, h = input_image.size
+        if size is not None:
+            w_resize_new, h_resize_new = size
+        else:
+            ratio = min_side / min(h, w)
+            w, h = round(ratio * w), round(ratio * h)
+            ratio = max_side / max(h, w)
+            input_image = input_image.resize([round(ratio * w), round(ratio * h)], mode)
+            w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
+            h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
+        input_image = input_image.resize([w_resize_new, h_resize_new], mode)
+
+        if pad_to_max_side:
+            res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
+            offset_x = (max_side - w_resize_new) // 2
+            offset_y = (max_side - h_resize_new) // 2
+            res[offset_y:offset_y + h_resize_new, offset_x:offset_x + w_resize_new] = np.array(input_image)
+            input_image = Image.fromarray(res)
+        return input_image

    def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
        p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
        return p.replace("{prompt}", positive), n + ' ' + negative

-    def generate_image(face_image_path, pose_image_path, prompt, negative_prompt, style_name, num_steps, identitynet_strength_ratio, adapter_strength_ratio, guidance_scale, seed, enable_LCM, enhance_face_region, progress=gr.Progress(track_tqdm=True)):
+    def generate_image(face_image_path, pose_image_path, prompt, negative_prompt, style_name, num_steps,
+                       identitynet_strength_ratio, adapter_strength_ratio, guidance_scale, seed, enable_LCM,
+                       enhance_face_region, progress=gr.Progress(track_tqdm=True)):
        if enable_LCM:
            pipe.enable_lora()
            pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
        else:
            pipe.disable_lora()
            pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
-    
+
        if face_image_path is None:
            raise gr.Error(f"Cannot find any input face image! Please upload the face image")
-        
+
        if prompt is None:
            prompt = "a person"
-        
+
        # apply the style template
        prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
-        
+
        face_image = load_image(face_image_path)
        face_image = resize_img(face_image)
        face_image_cv2 = convert_from_image_to_cv2(face_image)
        height, width, _ = face_image_cv2.shape
-        
+
        # Extract face features
        face_info = app.get(face_image_cv2)
-        
+
        if len(face_info) == 0:
            raise gr.Error(f"Cannot find any face in the image! Please upload another person image")
-        
-        face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1]  # only use the maximum face
+
+        face_info = sorted(face_info, key=lambda x: (x['bbox'][2] - x['bbox'][0]) * (x['bbox'][3] - x['bbox'][1]))[
+            -1]  # only use the maximum face
        face_emb = face_info['embedding']
        face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])
-        
+
        if pose_image_path is not None:
            pose_image = load_image(pose_image_path)
            pose_image = resize_img(pose_image)
            pose_image_cv2 = convert_from_image_to_cv2(pose_image)
-            
+
            face_info = app.get(pose_image_cv2)
-            
+
            if len(face_info) == 0:
                raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")
-            
+
            face_info = face_info[-1]
            face_kps = draw_kps(pose_image, face_info['kps'])
-            
+
            width, height = face_kps.size

        if enhance_face_region:
@@ -266,12 +272,12 @@ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=F
            control_mask = Image.fromarray(control_mask.astype(np.uint8))
        else:
            control_mask = None
-                        
+
        generator = torch.Generator(device=device).manual_seed(seed)
-        
+
        print("Start inference...")
        print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
-        
+
        pipe.set_ip_adapter_scale(adapter_strength_ratio)
        images = pipe(
            prompt=prompt,
@@ -291,18 +297,17 @@ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=F

    ### Description
    title = r"""
-    <h1 align="center">InstantID: Zero-shot Identity-Preserving Generation in Seconds</h1>
+    <h1 align="center">InstantID: 1张照片，无需训练，秒级生成个人写真</h1>
    """

    description = r"""
-    <b>Official 🤗 Gradio demo</b> for <a href='https://github.com/InstantID/InstantID' target='_blank'><b>InstantID: Zero-shot Identity-Preserving Generation in Seconds</b></a>.<br>
-
-    How to use:<br>
-    1. Upload an image with a face. For images with multiple faces, we will only detect the largest face. Ensure the face is not too small and is clearly visible without significant obstructions or blurring.
-    2. (Optional) You can upload another image as a reference for the face pose. If you don't, we will use the first detected face image to extract facial landmarks. If you use a cropped face at step 1, it is recommended to upload it to define a new face pose.
-    3. Enter a text prompt, as done in normal text-to-image models.
-    4. Click the <b>Submit</b> button to begin customization.
-    5. Share your customized photo with your friends and enjoy! 😊
+    <b>官方Gradio demo</b> for <a href='https://github.com/InstantID/InstantID' target='_blank'><b>InstantID: 1张照片，无需训练，秒级生成个人写真</b></a>.<br>
+    用户指南:<br>
+    1. 上传人物图片。 对于多人图像，我们只会检测最大的脸部。 确保脸部不要太小，并且没有明显遮挡或模糊。
+    2. （可选）上传另一个人的图像作为参考姿势。 如果没有上传，我们将使用第一张图像来提取姿势。 如果您在步骤1中使用了裁剪后的脸部，建议上传它以提取新的姿势。
+    3. （可选）输入文本prompt提示词，就像所有文生图应用中所做的那样.
+    4. 点击 <b>Submit</b> 按钮开始定制.
+    5. 分享美图给你的好友吧, enjoy😊!
    """

    article = r"""
@@ -342,27 +347,26 @@ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=F

        with gr.Row():
            with gr.Column():
-                
                # upload face image
                face_file = gr.Image(label="Upload a photo of your face", type="filepath")

                # optional: upload a reference pose image
                pose_file = gr.Image(label="Upload a reference pose image (optional)", type="filepath")
-           
+
                # prompt
                prompt = gr.Textbox(label="Prompt",
-                        info="Give simple prompt is enough to achieve good face fidelity",
-                        placeholder="A photo of a person",
-                        value="")
-                
+                                    info="Give simple prompt is enough to achieve good face fidelity",
+                                    placeholder="A photo of a person",
+                                    value="")
+
                submit = gr.Button("Submit", variant="primary")
-                
+
                enable_LCM = gr.Checkbox(
                    label="Enable Fast Inference with LCM", value=enable_lcm_arg,
                    info="LCM speeds up the inference step, the trade-off is the quality of the generated image. It performs better with portrait face images rather than distant faces",
                )
                style = gr.Dropdown(label="Style template", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME)
-                
+
                # strength
                identitynet_strength_ratio = gr.Slider(
                    label="IdentityNet strength (for fidelity)",
@@ -378,14 +382,14 @@ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=F
                    step=0.05,
                    value=0.80,
                )
-                
+
                with gr.Accordion(open=False, label="Advanced Options"):
                    negative_prompt = gr.Textbox(
-                        label="Negative Prompt", 
+                        label="Negative Prompt",
                        placeholder="low quality",
                        value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
                    )
-                    num_steps = gr.Slider( 
+                    num_steps = gr.Slider(
                        label="Number of sample steps",
                        minimum=20,
                        maximum=100,
@@ -411,11 +415,11 @@ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=F

            with gr.Column():
                gallery = gr.Image(label="Generated Images")
-                usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
+                usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips, visible=False)

            submit.click(
                fn=remove_tips,
-                outputs=usage_tips,            
+                outputs=usage_tips,
            ).then(
                fn=randomize_seed_fn,
                inputs=[seed, randomize_seed],
@@ -424,10 +428,11 @@ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=F
                api_name=False,
            ).then(
                fn=generate_image,
-                inputs=[face_file, pose_file, prompt, negative_prompt, style, num_steps, identitynet_strength_ratio, adapter_strength_ratio, guidance_scale, seed, enable_LCM, enhance_face_region],
+                inputs=[face_file, pose_file, prompt, negative_prompt, style, num_steps, identitynet_strength_ratio,
+                        adapter_strength_ratio, guidance_scale, seed, enable_LCM, enhance_face_region],
                outputs=[gallery, usage_tips]
            )
-        
+
            enable_LCM.input(fn=toggle_lcm_ui, inputs=[enable_LCM], outputs=[num_steps, guidance_scale], queue=False)

        gr.Examples(
@@ -438,10 +443,11 @@ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=F
            outputs=[gallery, usage_tips],
            cache_examples=True,
        )
-        
+
        gr.Markdown(article)

-    demo.launch(server_name='0.0.0.0')
+    demo.launch(server_name='0.0.0.0', share=True)
+

 if __name__ == "__main__":
    parser = argparse.ArgumentParser()