feat: 修复bug; 下载模型先更新huggingface-hub

32898938 · chenpangpang · 0a1fba1c · 32898938 · 32898938 · 32898938
Commit 32898938 authored Aug 21, 2024 by chenpangpang
Hide whitespace changes
Inline Side-by-side

Showing with 63 additions and 54 deletions

Dockerfile Dockerfile +2 -0

Kolors-FaceID/app.py Kolors-FaceID/app.py +59 -54

hf_down.py hf_down.py +2 -0

No files found.
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,6 +4,7 @@ ARG IMAGE_UPPER=Kolors-FaceID
 ARG BRANCH=gpu
 RUN cd /root && git clone -b $BRANCH http://developer.hpccube.com/codes/chenpangpang/$IMAGE.git
 WORKDIR /root/$IMAGE/$IMAGE_UPPER
+RUN apt-get update && apt-get install -y gcc g++
 RUN pip install -r requirements.txt && \
    pip install onnxruntime-gpu==1.18.0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/

@@ -17,6 +18,7 @@ COPY chenyh/$IMAGE/frpc_linux_amd64_v0.2 /opt/conda/lib/python3.10/site-packages
 RUN chmod +x /opt/conda/lib/python3.10/site-packages/gradio/frpc_linux_amd64_v0.2
 COPY chenyh/$IMAGE/Kwai-Kolors/Kolors /root/$IMAGE_UPPER/Kwai-Kolors/Kolors
 COPY chenyh/$IMAGE/Kwai-Kolors/Kolors-IP-Adapter-FaceID-Plus /root/$IMAGE_UPPER/Kwai-Kolors/Kolors-IP-Adapter-FaceID-Plus
+RUN apt-get update && apt install -y libgl1-mesa-glx libglib2.0-dev
 COPY --from=base /opt/conda/lib/python3.10/site-packages /opt/conda/lib/python3.10/site-packages
 COPY --from=base /root/$IMAGE/$IMAGE_UPPER /root/$IMAGE_UPPER
 COPY --from=base /root/$IMAGE/启动器.ipynb /root/$IMAGE/start.sh /root/
\ No newline at end of file
--- a/Kolors-FaceID/app.py
+++ b/Kolors-FaceID/app.py
-import spaces
 import random
 import torch
 import cv2
@@ -7,7 +6,7 @@ import gradio as gr
 import numpy as np
 import os
 from huggingface_hub import snapshot_download
-from transformers import CLIPVisionModelWithProjection,CLIPImageProcessor
+from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
 from kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256_ipadapter_FaceID import StableDiffusionXLPipeline
 from kolors.models.modeling_chatglm import ChatGLMModel
 from kolors.models.tokenization_chatglm import ChatGLMTokenizer
@@ -18,7 +17,6 @@ from PIL import Image
 from insightface.app import FaceAnalysis
 from insightface.data import get_image as ins_get_image

-
 device = "cuda"
 ckpt_dir = "Kwai-Kolors/Kolors"
 ckpt_dir_faceid = "Kwai-Kolors/Kolors-IP-Adapter-FaceID-Plus"
@@ -28,25 +26,28 @@ tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
 vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
 scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
 unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
-clip_image_encoder = CLIPVisionModelWithProjection.from_pretrained(f'{ckpt_dir_faceid}/clip-vit-large-patch14-336', ignore_mismatched_sizes=True)
+clip_image_encoder = CLIPVisionModelWithProjection.from_pretrained(f'{ckpt_dir_faceid}/clip-vit-large-patch14-336',
+                                                                   ignore_mismatched_sizes=True)
 clip_image_encoder.to(device)
-clip_image_processor = CLIPImageProcessor(size = 336, crop_size = 336)
+clip_image_processor = CLIPImageProcessor(size=336, crop_size=336)

 pipe = StableDiffusionXLPipeline(
-    vae = vae,
-    text_encoder = text_encoder,
-    tokenizer = tokenizer,
-    unet = unet,
-    scheduler = scheduler,
-    face_clip_encoder = clip_image_encoder,
-    face_clip_processor = clip_image_processor,
-    force_zeros_for_empty_prompt = False,
+    vae=vae,
+    text_encoder=text_encoder,
+    tokenizer=tokenizer,
+    unet=unet,
+    scheduler=scheduler,
+    face_clip_encoder=clip_image_encoder,
+    face_clip_processor=clip_image_processor,
+    force_zeros_for_empty_prompt=False,
 )

+
 class FaceInfoGenerator():
-    def __init__(self, root_dir = "./.insightface/"):
-        self.app = FaceAnalysis(name = 'antelopev2', root = root_dir, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
-        self.app.prepare(ctx_id = 0, det_size = (640, 640))
+    def __init__(self, root_dir="./.insightface/"):
+        self.app = FaceAnalysis(name='antelopev2', root=root_dir,
+                                providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
+        self.app.prepare(ctx_id=0, det_size=(640, 640))

    def get_faceinfo_one_img(self, face_image):
        face_info = self.app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
@@ -54,12 +55,14 @@ class FaceInfoGenerator():
        if len(face_info) == 0:
            face_info = None
        else:
-            face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1]  # only use the maximum face
+            face_info = sorted(face_info, key=lambda x: (x['bbox'][2] - x['bbox'][0]) * (x['bbox'][3] - x['bbox'][1]))[
+                -1]  # only use the maximum face
        return face_info

+
 def face_bbox_to_square(bbox):
    ## l, t, r, b to square l, t, r, b
-    l,t,r,b = bbox
+    l, t, r, b = bbox
    cent_x = (l + r) / 2
    cent_y = (t + b) / 2
    w, h = r - l, b - t
@@ -72,27 +75,28 @@ def face_bbox_to_square(bbox):

    return [l0, t0, r0, b0]

+
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 face_info_generator = FaceInfoGenerator()

-@spaces.GPU
-def infer(prompt, 
-          image = None, 
-          negative_prompt = "nsfw，脸部阴影，低分辨率，jpeg伪影、模糊、糟糕，黑脸，霓虹灯", 
-          seed = 66, 
-          randomize_seed = False,
-          guidance_scale = 5.0, 
-          num_inference_steps = 50
-        ):
+
+def infer(prompt,
+          image=None,
+          negative_prompt="nsfw，脸部阴影，低分辨率，jpeg伪影、模糊、糟糕，黑脸，霓虹灯",
+          seed=66,
+          randomize_seed=False,
+          guidance_scale=5.0,
+          num_inference_steps=50
+          ):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator().manual_seed(seed)
    global pipe
    pipe = pipe.to(device)
-    pipe.load_ip_adapter_faceid_plus(f'{ckpt_dir_faceid}/ipa-faceid-plus.bin', device = device)
+    pipe.load_ip_adapter_faceid_plus(f'{ckpt_dir_faceid}/ipa-faceid-plus.bin', device=device)
    scale = 0.8
-    pipe.set_face_fidelity_scale(scale)   
+    pipe.set_face_fidelity_scale(scale)

    face_info = face_info_generator.get_faceinfo_one_img(image)
    face_bbox_square = face_bbox_to_square(face_info["bbox"])
@@ -100,19 +104,19 @@ def infer(prompt,
    crop_image = crop_image.resize((336, 336))
    crop_image = [crop_image]
    face_embeds = torch.from_numpy(np.array([face_info["embedding"]]))
-    face_embeds = face_embeds.to(device, dtype = torch.float16)
+    face_embeds = face_embeds.to(device, dtype=torch.float16)

    image = pipe(
-        prompt = prompt,
-        negative_prompt = negative_prompt, 
-        height = 1024,
-        width = 1024,
-        num_inference_steps= num_inference_steps, 
-        guidance_scale = guidance_scale,
-        num_images_per_prompt = 1,
-        generator = generator,
-        face_crop_image = crop_image,
-        face_insightface_embeds = face_embeds
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        height=1024,
+        width=1024,
+        num_inference_steps=num_inference_steps,
+        guidance_scale=guidance_scale,
+        num_images_per_prompt=1,
+        generator=generator,
+        face_crop_image=crop_image,
+        face_insightface_embeds=face_embeds
    ).images[0]

    return image, seed
@@ -120,11 +124,11 @@ def infer(prompt,

 examples = [
    ["穿着晚礼服，在星光下的晚宴场景中，烛光闪闪，整个场景洋溢着浪漫而奢华的氛围", "image/image1.png"],
-    ["西部牛仔，牛仔帽，荒野大镖客，背景是西部小镇，仙人掌，,日落余晖, 暖色调, 使用XT4胶片拍摄, 噪点, 晕影, 柯达胶卷，复古", "image/image2.png"]
+    ["西部牛仔，牛仔帽，荒野大镖客，背景是西部小镇，仙人掌，,日落余晖, 暖色调, 使用XT4胶片拍摄, 噪点, 晕影, 柯达胶卷，复古",
+     "image/image2.png"]
 ]

-
-css="""
+css = """
 #col-left {
    margin: 0 auto;
    max-width: 600px;
@@ -138,11 +142,13 @@ css="""
 }
 """

+
 def load_description(fp):
    with open(fp, 'r', encoding='utf-8') as f:
        content = f.read()
    return content

+
 with gr.Blocks(css=css) as Kolors:
    gr.HTML(load_description("assets/title.md"))
    with gr.Row():
@@ -186,24 +192,23 @@ with gr.Blocks(css=css) as Kolors:
                    )
            with gr.Row():
                button = gr.Button("Run", elem_id="button")
-            
+
        with gr.Column(elem_id="col-right"):
            result = gr.Image(label="Result", show_label=False)
            seed_used = gr.Number(label="Seed Used")
-    
+
    with gr.Row():
        gr.Examples(
-                fn = infer,
-                examples = examples,
-                inputs = [prompt, image],
-                outputs = [result, seed_used],
-            )
+            fn=infer,
+            examples=examples,
+            inputs=[prompt, image],
+            outputs=[result, seed_used],
+        )

    button.click(
-        fn = infer,
-        inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps],
-        outputs = [result, seed_used]
+        fn=infer,
+        inputs=[prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps],
+        outputs=[result, seed_used]
    )

-
 Kolors.queue().launch(server_name="0.0.0.0", share=True)
--- a/hf_down.py
+++ b/hf_down.py
@@ -8,6 +8,8 @@ model_list = [
    "Kwai-Kolors/Kolors-IP-Adapter-FaceID-Plus"
 ]

+os.system("pip install -U huggingface-hub")
+
 for model_path in model_list:
    os.system(
        f"huggingface-cli download --resume-download  {model_path} --local-dir ./{model_path} --local-dir-use-symlinks False")