v1.0

aad7b6c7 · chenzk · aad7b6c7 · aad7b6c7 · aad7b6c7 · aad7b6c7
Commit aad7b6c7 authored Feb 19, 2025 by chenzk
19 changed files
--- a/hy3dgen/texgen/utils/__pycache__/dehighlight_utils.cpython-310.pyc
+++ b/hy3dgen/texgen/utils/__pycache__/dehighlight_utils.cpython-310.pyc
--- a/hy3dgen/texgen/utils/__pycache__/imagesuper_utils.cpython-310.pyc
+++ b/hy3dgen/texgen/utils/__pycache__/imagesuper_utils.cpython-310.pyc
--- a/hy3dgen/texgen/utils/__pycache__/multiview_utils.cpython-310.pyc
+++ b/hy3dgen/texgen/utils/__pycache__/multiview_utils.cpython-310.pyc
--- a/hy3dgen/texgen/utils/__pycache__/uv_warp_utils.cpython-310.pyc
+++ b/hy3dgen/texgen/utils/__pycache__/uv_warp_utils.cpython-310.pyc
--- a/hy3dgen/texgen/utils/alignImg4Tex_utils.py
+++ b/hy3dgen/texgen/utils/alignImg4Tex_utils.py
+# Open Source Model Licensed under the Apache License Version 2.0
+# and Other Licenses of the Third-Party Components therein:
+# The below Model in this distribution may have been modified by THL A29 Limited
+# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
+# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
+# The below software and/or models in this distribution may have been
+# modified by THL A29 Limited ("Tencent Modifications").
+# All Tencent Modifications are Copyright (C) THL A29 Limited.
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+import torch
+from diffusers import EulerAncestralDiscreteScheduler
+from diffusers import StableDiffusionControlNetPipeline, StableDiffusionXLControlNetImg2ImgPipeline, ControlNetModel, \
+    AutoencoderKL
+class Img2img_Control_Ip_adapter:
+    def __init__(self, device):
+        controlnet = ControlNetModel.from_pretrained('lllyasviel/control_v11f1p_sd15_depth', torch_dtype=torch.float16,
+                                                     variant="fp16", use_safetensors=True)
+        pipe = StableDiffusionControlNetPipeline.from_pretrained(
+            'runwayml/stable-diffusion-v1-5', controlnet=controlnet, torch_dtype=torch.float16, use_safetensors=True
+        )
+        pipe.load_ip_adapter('h94/IP-Adapter', subfolder="models", weight_name="ip-adapter-plus_sd15.safetensors")
+        pipe.set_ip_adapter_scale(0.7)
+        pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
+        # pipe.enable_model_cpu_offload()
+        self.pipe = pipe.to(device)
+    def __call__(
+        self,
+        prompt,
+        control_image,
+        ip_adapter_image,
+        negative_prompt,
+        height=512,
+        width=512,
+        num_inference_steps=20,
+        guidance_scale=8.0,
+        controlnet_conditioning_scale=1.0,
+        output_type="pil",
+        **kwargs,
+    ):
+        results = self.pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            image=control_image,
+            ip_adapter_image=ip_adapter_image,
+            generator=torch.manual_seed(42),
+            seed=42,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            controlnet_conditioning_scale=controlnet_conditioning_scale,
+            strength=1,
+            # clip_skip=2,
+            height=height,
+            width=width,
+            output_type=output_type,
+            **kwargs,
+        ).images[0]
+        return results
+################################################################
+class HesModel:
+    def __init__(self, ):
+        controlnet_depth = ControlNetModel.from_pretrained(
+            'diffusers/controlnet-depth-sdxl-1.0',
+            torch_dtype=torch.float16,
+            variant="fp16",
+            use_safetensors=True
+        )
+        self.pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
+            'stabilityai/stable-diffusion-xl-base-1.0',
+            torch_dtype=torch.float16,
+            variant="fp16",
+            controlnet=controlnet_depth,
+            use_safetensors=True,
+        )
+        self.pipe.vae = AutoencoderKL.from_pretrained(
+            'madebyollin/sdxl-vae-fp16-fix',
+            torch_dtype=torch.float16
+        )
+        self.pipe.load_ip_adapter('h94/IP-Adapter', subfolder="sdxl_models", weight_name="ip-adapter_sdxl.safetensors")
+        self.pipe.set_ip_adapter_scale(0.7)
+        self.pipe.to("cuda")
+    def __call__(self,
+                 init_image,
+                 control_image,
+                 ip_adapter_image=None,
+                 prompt='3D image',
+                 negative_prompt='2D image',
+                 seed=42,
+                 strength=0.8,
+                 num_inference_steps=40,
+                 guidance_scale=7.5,
+                 controlnet_conditioning_scale=0.5,
+                 **kwargs
+                 ):
+        image = self.pipe(
+            prompt=prompt,
+            image=init_image,
+            control_image=control_image,
+            ip_adapter_image=ip_adapter_image,
+            negative_prompt=negative_prompt,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            strength=strength,
+            controlnet_conditioning_scale=controlnet_conditioning_scale,
+            seed=seed,
+            **kwargs
+        ).images[0]
+        return image
--- a/hy3dgen/texgen/utils/counter_utils.py
+++ b/hy3dgen/texgen/utils/counter_utils.py
+# Open Source Model Licensed under the Apache License Version 2.0
+# and Other Licenses of the Third-Party Components therein:
+# The below Model in this distribution may have been modified by THL A29 Limited
+# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
+# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
+# The below software and/or models in this distribution may have been
+# modified by THL A29 Limited ("Tencent Modifications").
+# All Tencent Modifications are Copyright (C) THL A29 Limited.
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+class RunningStats():
+    def __init__(self) -> None:
+        self.count = 0
+        self.sum = 0
+        self.mean = 0
+        self.min = None
+        self.max = None
+    def add_value(self, value):
+        self.count += 1
+        self.sum += value
+        self.mean = self.sum / self.count
+        if self.min is None or value < self.min:
+            self.min = value
+        if self.max is None or value > self.max:
+            self.max = value
+    def get_count(self):
+        return self.count
+    def get_sum(self):
+        return self.sum
+    def get_mean(self):
+        return self.mean
+    def get_min(self):
+        return self.min
+    def get_max(self):
+        return self.max
--- a/hy3dgen/texgen/utils/dehighlight_utils.py
+++ b/hy3dgen/texgen/utils/dehighlight_utils.py
+# Open Source Model Licensed under the Apache License Version 2.0
+# and Other Licenses of the Third-Party Components therein:
+# The below Model in this distribution may have been modified by THL A29 Limited
+# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
+# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
+# The below software and/or models in this distribution may have been
+# modified by THL A29 Limited ("Tencent Modifications").
+# All Tencent Modifications are Copyright (C) THL A29 Limited.
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
+class Light_Shadow_Remover():
+    def __init__(self, config):
+        self.device = config.device
+        self.cfg_image = 1.5
+        self.cfg_text = 1.0
+        pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
+            config.light_remover_ckpt_path,
+            torch_dtype=torch.float16,
+            safety_checker=None,
+        )
+        pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config)
+        pipeline.set_progress_bar_config(disable=True)
+        self.pipeline = pipeline.to(self.device, torch.float16)
+    def recorrect_rgb(self, src_image, target_image, alpha_channel, scale=0.95):
+        def flat_and_mask(bgr, a):
+            mask = torch.where(a > 0.5, True, False)
+            bgr_flat = bgr.reshape(-1, bgr.shape[-1])
+            mask_flat = mask.reshape(-1)
+            bgr_flat_masked = bgr_flat[mask_flat, :]
+            return bgr_flat_masked
+        src_flat = flat_and_mask(src_image, alpha_channel)
+        target_flat = flat_and_mask(target_image, alpha_channel)
+        corrected_bgr = torch.zeros_like(src_image)
+        for i in range(3): 
+            src_mean, src_stddev = torch.mean(src_flat[:, i]), torch.std(src_flat[:, i])
+            target_mean, target_stddev = torch.mean(target_flat[:, i]), torch.std(target_flat[:, i])
+            corrected_bgr[:, :, i] = torch.clamp((src_image[:, :, i] - scale * src_mean) * (target_stddev / src_stddev) + scale * target_mean, 0, 1)
+        src_mse = torch.mean((src_image - target_image) ** 2)
+        modify_mse = torch.mean((corrected_bgr - target_image) ** 2)
+        if src_mse < modify_mse:
+            corrected_bgr = torch.cat([src_image, alpha_channel], dim=-1)
+        else: 
+            corrected_bgr = torch.cat([corrected_bgr, alpha_channel], dim=-1)
+        return corrected_bgr
+    @torch.no_grad()
+    def __call__(self, image):
+        image = image.resize((512, 512))
+        if image.mode == 'RGBA':
+            image_array = np.array(image)
+            alpha_channel = image_array[:, :, 3]
+            erosion_size = 3
+            kernel = np.ones((erosion_size, erosion_size), np.uint8)
+            alpha_channel = cv2.erode(alpha_channel, kernel, iterations=1)
+            image_array[alpha_channel == 0, :3] = 255
+            image_array[:, :, 3] = alpha_channel
+            image = Image.fromarray(image_array)
+            image_tensor = torch.tensor(np.array(image) / 255.0).to(self.device)
+            alpha = image_tensor[:, :, 3:]
+            rgb_target = image_tensor[:, :, :3]
+        else:
+            image_tensor = torch.tensor(np.array(image) / 255.0).to(self.device)
+            alpha = torch.ones_like(image_tensor)[:, :, :1]
+            rgb_target = image_tensor[:, :, :3]
+        image = image.convert('RGB')
+        image = self.pipeline(
+            prompt="",
+            image=image,
+            generator=torch.manual_seed(42),
+            height=512,
+            width=512,
+            num_inference_steps=50,
+            image_guidance_scale=self.cfg_image,
+            guidance_scale=self.cfg_text,
+        ).images[0]
+        image_tensor = torch.tensor(np.array(image)/255.0).to(self.device)
+        rgb_src = image_tensor[:,:,:3]
+        image = self.recorrect_rgb(rgb_src, rgb_target, alpha)
+        image = image[:,:,:3]*image[:,:,3:] + torch.ones_like(image[:,:,:3])*(1.0-image[:,:,3:])
+        image = Image.fromarray((image.cpu().numpy()*255).astype(np.uint8))
+        return image
--- a/hy3dgen/texgen/utils/imagesuper_utils.py
+++ b/hy3dgen/texgen/utils/imagesuper_utils.py
+# Open Source Model Licensed under the Apache License Version 2.0
+# and Other Licenses of the Third-Party Components therein:
+# The below Model in this distribution may have been modified by THL A29 Limited
+# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
+# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
+# The below software and/or models in this distribution may have been
+# modified by THL A29 Limited ("Tencent Modifications").
+# All Tencent Modifications are Copyright (C) THL A29 Limited.
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+import torch
+from diffusers import StableDiffusionUpscalePipeline
+class Image_Super_Net():
+    def __init__(self, config):
+        self.up_pipeline_x4 = StableDiffusionUpscalePipeline.from_pretrained(
+                        'stabilityai/stable-diffusion-x4-upscaler',
+                        torch_dtype=torch.float16,
+                    ).to(config.device)
+        self.up_pipeline_x4.set_progress_bar_config(disable=True)
+    def __call__(self, image, prompt=''):
+        with torch.no_grad():
+            upscaled_image = self.up_pipeline_x4(
+                prompt=[prompt],
+                image=image,
+                num_inference_steps=5,
+            ).images[0]
+        return upscaled_image
--- a/hy3dgen/texgen/utils/multiview_utils.py
+++ b/hy3dgen/texgen/utils/multiview_utils.py
+# Open Source Model Licensed under the Apache License Version 2.0
+# and Other Licenses of the Third-Party Components therein:
+# The below Model in this distribution may have been modified by THL A29 Limited
+# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
+# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
+# The below software and/or models in this distribution may have been
+# modified by THL A29 Limited ("Tencent Modifications").
+# All Tencent Modifications are Copyright (C) THL A29 Limited.
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+import os
+import random
+import numpy as np
+import torch
+from diffusers import DiffusionPipeline
+from diffusers import EulerAncestralDiscreteScheduler
+class Multiview_Diffusion_Net():
+    def __init__(self, config) -> None:
+        self.device = config.device
+        self.view_size = 512
+        multiview_ckpt_path = config.multiview_ckpt_path
+        current_file_path = os.path.abspath(__file__)
+        custom_pipeline_path = os.path.join(os.path.dirname(current_file_path), '..', 'hunyuanpaint')
+        pipeline = DiffusionPipeline.from_pretrained(
+            multiview_ckpt_path,
+            custom_pipeline=custom_pipeline_path, torch_dtype=torch.float16)
+        pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config,
+                                                                         timestep_spacing='trailing')
+        pipeline.set_progress_bar_config(disable=True)
+        self.pipeline = pipeline.to(self.device)
+    def seed_everything(self, seed):
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        os.environ["PL_GLOBAL_SEED"] = str(seed)
+    def __call__(self, input_image, control_images, camera_info):
+        self.seed_everything(0)
+        input_image = input_image.resize((self.view_size, self.view_size))
+        for i in range(len(control_images)):
+            control_images[i] = control_images[i].resize((self.view_size, self.view_size))
+            if control_images[i].mode == 'L':
+                control_images[i] = control_images[i].point(lambda x: 255 if x > 1 else 0, mode='1')
+        kwargs = dict(generator=torch.Generator(device=self.pipeline.device).manual_seed(0))
+        num_view = len(control_images) // 2
+        normal_image = [[control_images[i] for i in range(num_view)]]
+        position_image = [[control_images[i + num_view] for i in range(num_view)]]
+        camera_info_gen = [camera_info]
+        camera_info_ref = [[0]]
+        kwargs['width'] = self.view_size
+        kwargs['height'] = self.view_size
+        kwargs['num_in_batch'] = num_view
+        kwargs['camera_info_gen'] = camera_info_gen
+        kwargs['camera_info_ref'] = camera_info_ref
+        kwargs["normal_imgs"] = normal_image
+        kwargs["position_imgs"] = position_image
+        mvd_image = self.pipeline(input_image, num_inference_steps=30, **kwargs).images
+        return mvd_image
--- a/hy3dgen/texgen/utils/simplify_mesh_utils.py
+++ b/hy3dgen/texgen/utils/simplify_mesh_utils.py
+# Open Source Model Licensed under the Apache License Version 2.0
+# and Other Licenses of the Third-Party Components therein:
+# The below Model in this distribution may have been modified by THL A29 Limited
+# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
+# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
+# The below software and/or models in this distribution may have been
+# modified by THL A29 Limited ("Tencent Modifications").
+# All Tencent Modifications are Copyright (C) THL A29 Limited.
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+import trimesh
+def remesh_mesh(mesh_path, remesh_path, method='trimesh'):
+    if method == 'trimesh':
+        mesh_simplify_trimesh(mesh_path, remesh_path)
+    else:
+        raise f'Method {method} has not been implemented.'
+def mesh_simplify_trimesh(inputpath, outputpath):
+    import pymeshlab
+    ms = pymeshlab.MeshSet()
+    ms.load_new_mesh(inputpath, load_in_a_single_layer=True)
+    ms.save_current_mesh(outputpath.replace('.glb', '.obj'), save_textures=False)
+    courent = trimesh.load(outputpath.replace('.glb', '.obj'), force='mesh')
+    face_num = courent.faces.shape[0]
+    if face_num > 100000:
+        courent = courent.simplify_quadric_decimation(40000)
+    courent.export(outputpath)
--- a/hy3dgen/texgen/utils/uv_warp_utils.py
+++ b/hy3dgen/texgen/utils/uv_warp_utils.py
+# Open Source Model Licensed under the Apache License Version 2.0
+# and Other Licenses of the Third-Party Components therein:
+# The below Model in this distribution may have been modified by THL A29 Limited
+# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
+# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
+# The below software and/or models in this distribution may have been
+# modified by THL A29 Limited ("Tencent Modifications").
+# All Tencent Modifications are Copyright (C) THL A29 Limited.
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+import trimesh
+import xatlas
+def mesh_uv_wrap(mesh):
+    if isinstance(mesh, trimesh.Scene):
+        mesh = mesh.dump(concatenate=True)
+    if len(mesh.faces) > 50000:
+        raise ValueError("The mesh has more than 50,000 faces, which is not supported.")
+    vmapping, indices, uvs = xatlas.parametrize(mesh.vertices, mesh.faces)
+    mesh.vertices = mesh.vertices[vmapping]
+    mesh.faces = indices
+    mesh.visual.uv = uvs
+    return mesh
--- a/hy3dgen/text2image.py
+++ b/hy3dgen/text2image.py
+# Open Source Model Licensed under the Apache License Version 2.0
+# and Other Licenses of the Third-Party Components therein:
+# The below Model in this distribution may have been modified by THL A29 Limited
+# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
+# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
+# The below software and/or models in this distribution may have been
+# modified by THL A29 Limited ("Tencent Modifications").
+# All Tencent Modifications are Copyright (C) THL A29 Limited.
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+import os
+import random
+import numpy as np
+import torch
+from diffusers import AutoPipelineForText2Image
+def seed_everything(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    os.environ["PL_GLOBAL_SEED"] = str(seed)
+class HunyuanDiTPipeline:
+    def __init__(
+        self,
+        model_path="Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers-Distilled",
+        device='cuda'
+    ):
+        self.device = device
+        self.pipe = AutoPipelineForText2Image.from_pretrained(
+            model_path,
+            torch_dtype=torch.float16,
+            enable_pag=True,
+            pag_applied_layers=["blocks.(16|17|18|19)"]
+        ).to(device)
+        self.pos_txt = ",白色背景,3D风格,最佳质量"
+        self.neg_txt = "文本,特写,裁剪,出框,最差质量,低质量,JPEG伪影,PGLY,重复,病态," \
+                       "残缺,多余的手指,变异的手,画得不好的手,画得不好的脸,变异,畸形,模糊,脱水,糟糕的解剖学," \
+                       "糟糕的比例,多余的肢体,克隆的脸,毁容,恶心的比例,畸形的肢体,缺失的手臂,缺失的腿," \
+                       "额外的手臂,额外的腿,融合的手指,手指太多,长脖子"
+    def compile(self):
+        # accelarate hunyuan-dit transformer,first inference will cost long time
+        torch.set_float32_matmul_precision('high')
+        self.pipe.transformer = torch.compile(self.pipe.transformer, fullgraph=True)
+        # self.pipe.vae.decode = torch.compile(self.pipe.vae.decode, fullgraph=True)
+        generator = torch.Generator(device=self.pipe.device)  # infer once for hot-start
+        out_img = self.pipe(
+            prompt='美少女战士',
+            negative_prompt='模糊',
+            num_inference_steps=25,
+            pag_scale=1.3,
+            width=1024,
+            height=1024,
+            generator=generator,
+            return_dict=False
+        )[0][0]
+    @torch.no_grad()
+    def __call__(self, prompt, seed=0):
+        seed_everything(seed)
+        generator = torch.Generator(device=self.pipe.device)
+        generator = generator.manual_seed(int(seed))
+        out_img = self.pipe(
+            prompt=prompt[:60] + self.pos_txt,
+            negative_prompt=self.neg_txt,
+            num_inference_steps=25,
+            pag_scale=1.3,
+            width=1024,
+            height=1024,
+            generator=generator,
+            return_dict=False
+        )[0][0]
+        return out_img
--- a/mesh.glb
+++ b/mesh.glb
--- a/minimal_demo.py
+++ b/minimal_demo.py
+# Open Source Model Licensed under the Apache License Version 2.0
+# and Other Licenses of the Third-Party Components therein:
+# The below Model in this distribution may have been modified by THL A29 Limited
+# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
+# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
+# The below software and/or models in this distribution may have been
+# modified by THL A29 Limited ("Tencent Modifications").
+# All Tencent Modifications are Copyright (C) THL A29 Limited.
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+import torch
+from PIL import Image
+from hy3dgen.rembg import BackgroundRemover
+from hy3dgen.shapegen import Hunyuan3DDiTFlowMatchingPipeline, FaceReducer, FloaterRemover, DegenerateFaceRemover
+from hy3dgen.text2image import HunyuanDiTPipeline
+def image_to_3d(image_path='assets/demo.png'):
+    rembg = BackgroundRemover()
+    model_path = 'tencent/Hunyuan3D-2'
+    image = Image.open(image_path)
+    if image.mode == 'RGB':
+        image = rembg(image)
+    pipeline = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(model_path)
+    mesh = pipeline(image=image, num_inference_steps=30, mc_algo='mc',
+                    generator=torch.manual_seed(2025))[0]
+    mesh = FloaterRemover()(mesh)
+    mesh = DegenerateFaceRemover()(mesh)
+    mesh = FaceReducer()(mesh)
+    mesh.export('mesh.glb')
+    try:
+        from hy3dgen.texgen import Hunyuan3DPaintPipeline
+        pipeline = Hunyuan3DPaintPipeline.from_pretrained(model_path)
+        mesh = pipeline(mesh, image=image)
+        mesh.export('texture.glb')
+    except Exception as e:
+        print(e)
+        print('Please try to install requirements by following README.md')
+def text_to_3d(prompt='a car'):
+    rembg = BackgroundRemover()
+    t2i = HunyuanDiTPipeline('Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers-Distilled')
+    model_path = 'tencent/Hunyuan3D-2'
+    i23d = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(model_path)
+    image = t2i(prompt)
+    image = rembg(image)
+    mesh = i23d(image, num_inference_steps=30, mc_algo='mc')[0]
+    mesh = FloaterRemover()(mesh)
+    mesh = DegenerateFaceRemover()(mesh)
+    mesh = FaceReducer()(mesh)
+    mesh.export('t2i_demo.glb')
+def image_to_3d_fast(image_path='assets/demo.png'):
+    rembg = BackgroundRemover()
+    model_path = 'tencent/Hunyuan3D-2'
+    image = Image.open(image_path)
+    if image.mode == 'RGB':
+        image = rembg(image)
+    pipeline = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(
+        model_path,
+        subfolder='hunyuan3d-dit-v2-0-fast',
+        variant='fp16'
+    )
+    mesh = pipeline(image=image, num_inference_steps=30, mc_algo='mc',
+                    generator=torch.manual_seed(2025))[0]
+    mesh = FloaterRemover()(mesh)
+    mesh = DegenerateFaceRemover()(mesh)
+    mesh = FaceReducer()(mesh)
+    mesh.export('mesh.glb')
+if __name__ == '__main__':
+    image_to_3d_fast()
+    # image_to_3d()
+    # text_to_3d()
--- a/model.properties
+++ b/model.properties
+# 模型编码
+modelCode=1421
+# 模型名称
+modelName=Hunyuan3D-2_pytorch
+# 模型描述
+modelDescription=腾讯提出Hunyuan3D-2，用于生成带有高分辨率纹理贴图的高保真度3D模型。
+# 应用场景
+appScenario=推理,3D生成,动漫,广媒,影视,制造,医疗,家居,教育
+# 框架类型
+frameType=pytorch
--- a/requirements.txt
+++ b/requirements.txt
+ninja
+pybind11
+diffusers
+einops
+opencv-python
+numpy
+torch
+transformers
+torchvision
+#taming-transformers-rom1504
+#ConfigArgParse
+#ipdb
+omegaconf
+scikit-image
+rembg
+onnxruntime
+numba==0.58.0
+numpy==1.25.0
+transformers==4.49.0
+#sentencepiece
+tqdm
+# Mesh Processing
+trimesh
+pymeshlab
+pygltflib
+xatlas
+#kornia
+#facexlib
+# Training
+accelerate
+#pytorch_lightning
+#scikit-learn
+#scikit-image
+# Demo only
+gradio
+fastapi
+uvicorn
+rembg
+onnxruntime
+#gevent
+#geventhttpclient
--- a/setup.py
+++ b/setup.py
+# Open Source Model Licensed under the Apache License Version 2.0
+# and Other Licenses of the Third-Party Components therein:
+# The below Model in this distribution may have been modified by THL A29 Limited
+# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
+# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
+# The below software and/or models in this distribution may have been
+# modified by THL A29 Limited ("Tencent Modifications").
+# All Tencent Modifications are Copyright (C) THL A29 Limited.
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+from setuptools import setup, find_packages
+setup(
+    name="hy3dgen",
+    version="2.0.0",
+    packages=find_packages(),
+)
--- a/stabilityai/stable-diffusion-x4-upscaler/README.md
+++ b/stabilityai/stable-diffusion-x4-upscaler/README.md
+---
+license: openrail++
+tags:
+- stable-diffusion
+inference: false
+---
+# Stable Diffusion x4 upscaler model card
+This model card focuses on the model associated with the Stable Diffusion Upscaler, available [here](https://github.com/Stability-AI/stablediffusion).
+This model is trained for 1.25M steps on a 10M subset of LAION containing images `>2048x2048`. The model was trained on crops of size `512x512` and is a text-guided [latent upscaling diffusion model](https://arxiv.org/abs/2112.10752).
+In addition to the textual input, it receives a `noise_level` as an input parameter, which can be used to add noise to the low-resolution input according to a [predefined diffusion schedule](configs/stable-diffusion/x4-upscaling.yaml). 
+![Image](https://github.com/Stability-AI/stablediffusion/raw/main/assets/stable-samples/upscaling/merged-dog.png)
+- Use it with the [`stablediffusion`](https://github.com/Stability-AI/stablediffusion) repository: download the `x4-upscaler-ema.ckpt` [here](https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler/resolve/main/x4-upscaler-ema.ckpt).
+- Use it with 🧨 [`diffusers`](https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler#examples)
+## Model Details
+- **Developed by:** Robin Rombach, Patrick Esser
+- **Model type:** Diffusion-based text-to-image generation model
+- **Language(s):** English
+- **License:** [CreativeML Open RAIL++-M License](https://huggingface.co/stabilityai/stable-diffusion-2/blob/main/LICENSE-MODEL)
+- **Model Description:** This is a model that can be used to generate and modify images based on text prompts. It is a [Latent Diffusion Model](https://arxiv.org/abs/2112.10752) that uses a fixed, pretrained text encoder ([OpenCLIP-ViT/H](https://github.com/mlfoundations/open_clip)).
+- **Resources for more information:** [GitHub Repository](https://github.com/Stability-AI/).
+- **Cite as:**
+      @InProceedings{Rombach_2022_CVPR,
+          author    = {Rombach, Robin and Blattmann, Andreas and Lorenz, Dominik and Esser, Patrick and Ommer, Bj\"orn},
+          title     = {High-Resolution Image Synthesis With Latent Diffusion Models},
+          booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+          month     = {June},
+          year      = {2022},
+          pages     = {10684-10695}
+      }
+## Examples
+Using the [🤗's Diffusers library](https://github.com/huggingface/diffusers) to run Stable Diffusion 2 in a simple and efficient manner.
+```bash
+pip install diffusers transformers accelerate scipy safetensors
+```
+```python
+import requests
+from PIL import Image
+from io import BytesIO
+from diffusers import StableDiffusionUpscalePipeline
+import torch
+# load model and scheduler
+model_id = "stabilityai/stable-diffusion-x4-upscaler"
+pipeline = StableDiffusionUpscalePipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipeline = pipeline.to("cuda")
+# let's download an  image
+url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-upscale/low_res_cat.png"
+response = requests.get(url)
+low_res_img = Image.open(BytesIO(response.content)).convert("RGB")
+low_res_img = low_res_img.resize((128, 128))
+prompt = "a white cat"
+upscaled_image = pipeline(prompt=prompt, image=low_res_img).images[0]
+upscaled_image.save("upsampled_cat.png")
+```
+**Notes**:
+- Despite not being a dependency, we highly recommend you to install [xformers](https://github.com/facebookresearch/xformers) for memory efficient attention (better performance)
+- If you have low GPU RAM available, make sure to add a `pipe.enable_attention_slicing()` after sending it to `cuda` for less VRAM usage (to the cost of speed)
+# Uses
+## Direct Use 
+The model is intended for research purposes only. Possible research areas and tasks include
+- Safe deployment of models which have the potential to generate harmful content.
+- Probing and understanding the limitations and biases of generative models.
+- Generation of artworks and use in design and other artistic processes.
+- Applications in educational or creative tools.
+- Research on generative models.
+Excluded uses are described below.
+ ### Misuse, Malicious Use, and Out-of-Scope Use
+_Note: This section is originally taken from the [DALLE-MINI model card](https://huggingface.co/dalle-mini/dalle-mini), was used for Stable Diffusion v1, but applies in the same way to Stable Diffusion v2_.
+The model should not be used to intentionally create or disseminate images that create hostile or alienating environments for people. This includes generating images that people would foreseeably find disturbing, distressing, or offensive; or content that propagates historical or current stereotypes.
+#### Out-of-Scope Use
+The model was not trained to be factual or true representations of people or events, and therefore using the model to generate such content is out-of-scope for the abilities of this model.
+#### Misuse and Malicious Use
+Using the model to generate content that is cruel to individuals is a misuse of this model. This includes, but is not limited to:
+- Generating demeaning, dehumanizing, or otherwise harmful representations of people or their environments, cultures, religions, etc.
+- Intentionally promoting or propagating discriminatory content or harmful stereotypes.
+- Impersonating individuals without their consent.
+- Sexual content without consent of the people who might see it.
+- Mis- and disinformation
+- Representations of egregious violence and gore
+- Sharing of copyrighted or licensed material in violation of its terms of use.
+- Sharing content that is an alteration of copyrighted or licensed material in violation of its terms of use.
+## Limitations and Bias
+### Limitations
+- The model does not achieve perfect photorealism
+- The model cannot render legible text
+- The model does not perform well on more difficult tasks which involve compositionality, such as rendering an image corresponding to “A red cube on top of a blue sphere”
+- Faces and people in general may not be generated properly.
+- The model was trained mainly with English captions and will not work as well in other languages.
+- The autoencoding part of the model is lossy
+- The model was trained on a subset of the large-scale dataset
+  [LAION-5B](https://laion.ai/blog/laion-5b/), which contains adult, violent and sexual content. To partially mitigate this, we have filtered the dataset using LAION's NFSW detector (see Training section).
+### Bias
+While the capabilities of image generation models are impressive, they can also reinforce or exacerbate social biases. 
+Stable Diffusion vw was primarily trained on subsets of [LAION-2B(en)](https://laion.ai/blog/laion-5b/), 
+which consists of images that are limited to English descriptions. 
+Texts and images from communities and cultures that use other languages are likely to be insufficiently accounted for. 
+This affects the overall output of the model, as white and western cultures are often set as the default. Further, the 
+ability of the model to generate content with non-English prompts is significantly worse than with English-language prompts.
+Stable Diffusion v2 mirrors and exacerbates biases to such a degree that viewer discretion must be advised irrespective of the input or its intent.
+## Training
+**Training Data**
+The model developers used the following dataset for training the model:
+- LAION-5B and subsets (details below). The training data is further filtered using LAION's NSFW detector, with a "p_unsafe" score of 0.1 (conservative). For more details, please refer to LAION-5B's [NeurIPS 2022](https://openreview.net/forum?id=M3Y74vmsMcY) paper and reviewer discussions on the topic.
+**Training Procedure**
+Stable Diffusion v2 is a latent diffusion model which combines an autoencoder with a diffusion model that is trained in the latent space of the autoencoder. During training, 
+- Images are encoded through an encoder, which turns images into latent representations. The autoencoder uses a relative downsampling factor of 8 and maps images of shape H x W x 3 to latents of shape H/f x W/f x 4
+- Text prompts are encoded through the OpenCLIP-ViT/H text-encoder.
+- The output of the text encoder is fed into the UNet backbone of the latent diffusion model via cross-attention.
+- The loss is a reconstruction objective between the noise that was added to the latent and the prediction made by the UNet. We also use the so-called _v-objective_, see https://arxiv.org/abs/2202.00512.
+We currently provide the following checkpoints:
+- `512-base-ema.ckpt`: 550k steps at resolution `256x256` on a subset of [LAION-5B](https://laion.ai/blog/laion-5b/) filtered for explicit pornographic material, using the [LAION-NSFW classifier](https://github.com/LAION-AI/CLIP-based-NSFW-Detector) with `punsafe=0.1` and an [aesthetic score](https://github.com/christophschuhmann/improved-aesthetic-predictor) >= `4.5`.
+  850k steps at resolution `512x512` on the same dataset with resolution `>= 512x512`.
+- `768-v-ema.ckpt`: Resumed from `512-base-ema.ckpt` and trained for 150k steps using a [v-objective](https://arxiv.org/abs/2202.00512) on the same dataset. Resumed for another 140k steps on a `768x768` subset of our dataset.
+- `512-depth-ema.ckpt`: Resumed from `512-base-ema.ckpt` and finetuned for 200k steps. Added an extra input channel to process the (relative) depth prediction produced by [MiDaS](https://github.com/isl-org/MiDaS) (`dpt_hybrid`) which is used as an additional conditioning.
+The additional input channels of the U-Net which process this extra information were zero-initialized.
+- `512-inpainting-ema.ckpt`: Resumed from `512-base-ema.ckpt` and trained for another 200k steps. Follows the mask-generation strategy presented in [LAMA](https://github.com/saic-mdal/lama) which, in combination with the latent VAE representations of the masked image, are used as an additional conditioning.
+The additional input channels of the U-Net which process this extra information were zero-initialized. The same strategy was used to train the [1.5-inpainting checkpoint](https://github.com/saic-mdal/lama).
+- `x4-upscaling-ema.ckpt`: Trained for 1.25M steps on a 10M subset of LAION containing images `>2048x2048`. The model was trained on crops of size `512x512` and is a text-guided [latent upscaling diffusion model](https://arxiv.org/abs/2112.10752).
+In addition to the textual input, it receives a `noise_level` as an input parameter, which can be used to add noise to the low-resolution input according to a [predefined diffusion schedule](configs/stable-diffusion/x4-upscaling.yaml). 
+- **Hardware:** 32 x 8 x A100 GPUs
+- **Optimizer:** AdamW
+- **Gradient Accumulations**: 1
+- **Batch:** 32 x 8 x 2 x 4 = 2048
+- **Learning rate:** warmup to 0.0001 for 10,000 steps and then kept constant
+## Evaluation Results 
+Evaluations with different classifier-free guidance scales (1.5, 2.0, 3.0, 4.0,
+5.0, 6.0, 7.0, 8.0) and 50 steps DDIM sampling steps show the relative improvements of the checkpoints:
+![pareto](model-variants.jpg) 
+Evaluated using 50 DDIM steps and 10000 random prompts from the COCO2017 validation set, evaluated at 512x512 resolution.  Not optimized for FID scores.
+## Environmental Impact
+**Stable Diffusion v1** **Estimated Emissions**
+Based on that information, we estimate the following CO2 emissions using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). The hardware, runtime, cloud provider, and compute region were utilized to estimate the carbon impact.
+- **Hardware Type:** A100 PCIe 40GB
+- **Hours used:** 200000
+- **Cloud Provider:** AWS
+- **Compute Region:** US-east
+- **Carbon Emitted (Power consumption x Time x Carbon produced based on location of power grid):** 15000 kg CO2 eq.
+## Citation
+    @InProceedings{Rombach_2022_CVPR,
+        author    = {Rombach, Robin and Blattmann, Andreas and Lorenz, Dominik and Esser, Patrick and Ommer, Bj\"orn},
+        title     = {High-Resolution Image Synthesis With Latent Diffusion Models},
+        booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+        month     = {June},
+        year      = {2022},
+        pages     = {10684-10695}
+    }
+*This model card was written by: Robin Rombach, Patrick Esser and David Ha and is based on the [Stable Diffusion v1](https://github.com/CompVis/stable-diffusion/blob/main/Stable_Diffusion_v1_Model_Card.md) and [DALL-E Mini model card](https://huggingface.co/dalle-mini/dalle-mini).*
--- a/tencent/Hunyuan3D-2/README.md
+++ b/tencent/Hunyuan3D-2/README.md
+---
+library_name: hunyuan3d-2.0
+license: other
+license_name: tencent-hunyuan-community
+license_link: https://huggingface.co/tencent/Hunyuan3D-2/blob/main/LICENSE.txt
+language:
+  - en
+  - zh
+tags:
+  - image-to-3d
+  - text-to-3d
+pipeline_tag: image-to-3d
+---
+<p align="center">
+  <img src="./assets/images/teaser.jpg">
+</p>
+<div align="center">
+  <a href=https://3d.hunyuan.tencent.com target="_blank"><img src=https://img.shields.io/badge/Hunyuan3D-black.svg?logo=homepage height=22px></a>
+  <a href=https://huggingface.co/spaces/tencent/Hunyuan3D-2  target="_blank"><img src=https://img.shields.io/badge/%F0%9F%A4%97%20Demo-276cb4.svg height=22px></a>
+  <a href=https://huggingface.co/tencent/Hunyuan3D-2 target="_blank"><img src=https://img.shields.io/badge/%F0%9F%A4%97%20Models-d96902.svg height=22px></a>
+  <a href=https://3d-models.hunyuan.tencent.com/ target="_blank"><img src= https://img.shields.io/badge/Page-bb8a2e.svg?logo=github height=22px></a>
+<a href=https://discord.gg/GuaWYwzKbX target="_blank"><img src= https://img.shields.io/badge/Discord-white.svg?logo=discord height=22px></a>
+    <a href=https://github.com/Tencent/Hunyuan3D-2/blob/main/assets/report/Tencent_Hunyuan3D_2_0.pdf target="_blank"><img src=https://img.shields.io/badge/Report-b5212f.svg?logo=arxiv height=22px></a>
+</div>
+[//]: # (  <a href=# target="_blank"><img src=https://img.shields.io/badge/Report-b5212f.svg?logo=arxiv height=22px></a>)
+[//]: # (  <a href=# target="_blank"><img src= https://img.shields.io/badge/Colab-8f2628.svg?logo=googlecolab height=22px></a>)
+[//]: # (  <a href="#"><img alt="PyPI - Downloads" src="https://img.shields.io/pypi/v/mulankit?logo=pypi"  height=22px></a>)
+<br>
+<p align="center">
+“ Living out everyone’s imagination on creating and manipulating 3D assets.”
+</p>
+This repository contains the models of the paper [Hunyuan3D 2.0: Scaling Diffusion Models for High Resolution Textured 3D Assets Generation](https://huggingface.co/papers/2501.12202).
+For code and more details on how to use it, refer to the [Github repository](https://github.com/Tencent/Hunyuan3D-2).
+## 🔥 News
+- Jan 21, 2025: 💬 Release [Hunyuan3D 2.0](https://huggingface.co/spaces/tencent/Hunyuan3D-2). Please give it a try!
+## **Abstract**
+We present Hunyuan3D 2.0, an advanced large-scale 3D synthesis system for generating high-resolution textured 3D assets.
+This system includes two foundation components: a large-scale shape generation model - Hunyuan3D-DiT, and a large-scale
+texture synthesis model - Hunyuan3D-Paint.
+The shape generative model, built on a scalable flow-based diffusion transformer, aims to create geometry that properly
+aligns with a given condition image, laying a solid foundation for downstream applications.
+The texture synthesis model, benefiting from strong geometric and diffusion priors, produces high-resolution and vibrant
+texture maps for either generated or hand-crafted meshes.
+Furthermore, we build Hunyuan3D-Studio - a versatile, user-friendly production platform that simplifies the re-creation
+process of 3D assets. It allows both professional and amateur users to manipulate or even animate their meshes
+efficiently.
+We systematically evaluate our models, showing that Hunyuan3D 2.0 outperforms previous state-of-the-art models,
+including the open-source models and closed-source models in geometry details, condition alignment, texture quality, and
+e.t.c.
+<p align="center">
+  <img src="assets/images/system.jpg">
+</p>
+## ☯️ **Hunyuan3D 2.0**
+### Architecture
+Hunyuan3D 2.0 features a two-stage generation pipeline, starting with the creation of a bare mesh, followed by the
+synthesis of a texture map for that mesh. This strategy is effective for decoupling the difficulties of shape and
+texture generation and also provides flexibility for texturing either generated or handcrafted meshes.
+<p align="left">
+  <img src="assets/images/arch.jpg">
+</p>
+### Performance
+We have evaluated Hunyuan3D 2.0 with other open-source as well as close-source 3d-generation methods.
+The numerical results indicate that Hunyuan3D 2.0 surpasses all baselines in the quality of generated textured 3D assets
+and the condition following ability.
+| Model                   | CMMD(⬇)   | FID_CLIP(⬇) | FID(⬇)      | CLIP-score(⬆) |
+|-------------------------|-----------|-------------|-------------|---------------|
+| Top Open-source Model1  | 3.591     | 54.639      | 289.287     | 0.787         |
+| Top Close-source Model1 | 3.600     | 55.866      | 305.922     | 0.779         |
+| Top Close-source Model2 | 3.368     | 49.744      | 294.628     | 0.806         |
+| Top Close-source Model3 | 3.218     | 51.574      | 295.691     | 0.799         |
+| Hunyuan3D 2.0           | **3.193** | **49.165**  | **282.429** | **0.809**     |
+Generation results of Hunyuan3D 2.0:
+<p align="left">
+  <img src="assets/images/e2e-1.gif"  height=300>
+  <img src="assets/images/e2e-2.gif"  height=300>
+</p>
+### Pretrained Models
+| Model                | Date       | Huggingface                                            |
+|----------------------|------------|--------------------------------------------------------| 
+| Hunyuan3D-DiT-v2-0   | 2025-01-21 | [Download](https://huggingface.co/tencent/Hunyuan3D-2) |
+| Hunyuan3D-Paint-v2-0 | 2025-01-21 | [Download](https://huggingface.co/tencent/Hunyuan3D-2) |
+| Hunyuan3D-Delight-v2-0 | 2025-01-21 | [Download](https://huggingface.co/tencent/Hunyuan3D-2/tree/main/hunyuan3d-delight-v2-0) |
+## 🤗 Get Started with Hunyuan3D 2.0
+You may follow the next steps to use Hunyuan3D 2.0 via code or the Gradio App.
+### Install Requirements
+Please install Pytorch via the [official](https://pytorch.org/) site. Then install the other requirements via
+```bash
+pip install -r requirements.txt
+# for texture
+cd hy3dgen/texgen/custom_rasterizer
+python3 setup.py install
+cd ../../..
+cd hy3dgen/texgen/differentiable_renderer
+bash compile_mesh_painter.sh OR python3 setup.py install (on Windows)
+```
+### API Usage
+We designed a diffusers-like API to use our shape generation model - Hunyuan3D-DiT and texture synthesis model -
+Hunyuan3D-Paint.
+You could assess **Hunyuan3D-DiT** via:
+```python
+from hy3dgen.shapegen import Hunyuan3DDiTFlowMatchingPipeline
+pipeline = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained('tencent/Hunyuan3D-2')
+mesh = pipeline(image='assets/demo.png')[0]
+```
+The output mesh is a [trimesh object](https://trimesh.org/trimesh.html), which you could save to glb/obj (or other
+format) file.
+For **Hunyuan3D-Paint**, do the following:
+```python
+from hy3dgen.texgen import Hunyuan3DPaintPipeline
+from hy3dgen.shapegen import Hunyuan3DDiTFlowMatchingPipeline
+# let's generate a mesh first
+pipeline = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained('tencent/Hunyuan3D-2')
+mesh = pipeline(image='assets/demo.png')[0]
+pipeline = Hunyuan3DPaintPipeline.from_pretrained('tencent/Hunyuan3D-2')
+mesh = pipeline(mesh, image='assets/demo.png')
+```
+Please visit [minimal_demo.py](minimal_demo.py) for more advanced usage, such as **text to 3D** and **texture generation
+for handcrafted mesh**.
+### Gradio App
+You could also host a [Gradio](https://www.gradio.app/) App in your own computer via:
+```bash
+pip3 install gradio==3.39.0
+python3 gradio_app.py
+```
+Don't forget to visit [Hunyuan3D](https://3d.hunyuan.tencent.com) for quick use, if you don't want to host yourself.
+## 📑 Open-Source Plan
+- [x] Inference Code
+- [x] Model Checkpoints
+- [x] Technical Report
+- [ ] ComfyUI
+- [ ] TensorRT Version
+## 🔗 BibTeX
+If you found this repository helpful, please cite our report:
+```bibtex
+@misc{hunyuan3d22025tencent,
+    title={Hunyuan3D 2.0: Scaling Diffusion Models for High Resolution Textured 3D Assets Generation},
+    author={Tencent Hunyuan3D Team},
+    year={2025},
+    eprint={2501.12202},
+    archivePrefix={arXiv},
+    primaryClass={cs.CV}
+}
+@misc{yang2024tencent,
+    title={Tencent Hunyuan3D-1.0: A Unified Framework for Text-to-3D and Image-to-3D Generation},
+    author={Tencent Hunyuan3D Team},
+    year={2024},
+    eprint={2411.02293},
+    archivePrefix={arXiv},
+    primaryClass={cs.CV}
+}
+```
+## Community Resources
+Thanks for the contributions of community members, here we have these great extensions of Hunyuan3D 2.0:
+- [ComfyUI-Hunyuan3DWrapper](https://github.com/kijai/ComfyUI-Hunyuan3DWrapper)
+- [Hunyuan3D-2-for-windows](https://github.com/sdbds/Hunyuan3D-2-for-windows)
+- [📦 A bundle for running on Windows | 整合包](https://github.com/YanWenKun/Comfy3D-WinPortable/releases/tag/r8-hunyuan3d2)
+## Acknowledgements
+We would like to thank the contributors to
+the [DINOv2](https://github.com/facebookresearch/dinov2), [Stable Diffusion](https://github.com/Stability-AI/stablediffusion), [FLUX](https://github.com/black-forest-labs/flux), [diffusers](https://github.com/huggingface/diffusers)
+and [HuggingFace](https://huggingface.co) repositories, for their open research and exploration.
+## Star History
+<a href="https://star-history.com/#Tencent/Hunyuan3D-2&Date">
+ <picture>
+   <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=Tencent/Hunyuan3D-2&type=Date&theme=dark" />
+   <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=Tencent/Hunyuan3D-2&type=Date" />
+   <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=Tencent/Hunyuan3D-2&type=Date" />
+ </picture>
+</a>
\ No newline at end of file