feat: 初始提交

63bde97a · chenpangpang · 9cf8c6f1 · 63bde97a · 63bde97a · 63bde97a
Commit 63bde97a authored Aug 05, 2024 by chenpangpang
20 changed files
--- a/.gitignore
+++ b/.gitignore
+.idea
--- a/Dockerfile
+++ b/Dockerfile
+FROM image.sourcefind.cn:5000/gpu/admin/base/jupyterlab-pytorch:2.2.0-python3.10-cuda12.1-ubuntu22.04 as base
+ARG IMAGE=instantmesh
+ARG IMAGE_UPPER=InstantMesh
+ARG BRANCH=gpu
+RUN cd /root && git clone -b $BRANCH http://developer.hpccube.com/codes/chenpangpang/$IMAGE.git
+WORKDIR /root/$IMAGE/$IMAGE_UPPER
+RUN pip install Ninja xformers triton
+RUN pip install -r requirements.txt
+
+
+#########
+# Prod  #
+#########
+FROM image.sourcefind.cn:5000/gpu/admin/base/jupyterlab-pytorch:2.2.0-python3.10-cuda12.1-ubuntu22.04
+ARG IMAGE=instantmesh
+ARG IMAGE_UPPER=InstantMesh
+COPY chenyh/$IMAGE/frpc_linux_amd64_v0.2 /opt/conda/lib/python3.10/site-packages/gradio/
+RUN chmod +x /opt/conda/lib/python3.10/site-packages/gradio/frpc_linux_amd64_v0.2
+COPY chenyh/$IMAGE/sudo-ai/zero123plus-v1.2 /root/$IMAGE_UPPER/sudo-ai/zero123plus-v1.2
+COPY chenyh/$IMAGE/TencentARC/InstantMesh /root/$IMAGE_UPPER/TencentARC/InstantMesh
+COPY --from=base /opt/conda/lib/python3.10/site-packages /opt/conda/lib/python3.10/site-packages
+COPY --from=base /root/$IMAGE/$IMAGE_UPPER /root/$IMAGE_UPPER
+COPY --from=base /root/$IMAGE/启动器.ipynb /root/$IMAGE/start.sh /root/
--- a/InstantMesh/LICENSE
+++ b/InstantMesh/LICENSE
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/InstantMesh/README.md
+++ b/InstantMesh/README.md
+<div align="center">
+  
+# InstantMesh: Efficient 3D Mesh Generation from a Single Image with Sparse-view Large Reconstruction Models
+
+<a href="https://arxiv.org/abs/2404.07191"><img src="https://img.shields.io/badge/ArXiv-2404.07191-brightgreen"></a> 
+<a href="https://huggingface.co/TencentARC/InstantMesh"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Model_Card-Huggingface-orange"></a> 
+<a href="https://huggingface.co/spaces/TencentARC/InstantMesh"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Gradio%20Demo-Huggingface-orange"></a> <br>
+<a href="https://replicate.com/camenduru/instantmesh"><img src="https://img.shields.io/badge/Demo-Replicate-blue"></a>
+<a href="https://colab.research.google.com/github/camenduru/InstantMesh-jupyter/blob/main/InstantMesh_jupyter.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg"></a>
+<a href="https://github.com/jtydhr88/ComfyUI-InstantMesh"><img src="https://img.shields.io/badge/Demo-ComfyUI-8A2BE2"></a>
+
+</div>
+
+---
+
+This repo is the official implementation of InstantMesh, a feed-forward framework for efficient 3D mesh generation from a single image based on the LRM/Instant3D architecture.
+
+https://github.com/TencentARC/InstantMesh/assets/20635237/dab3511e-e7c6-4c0b-bab7-15772045c47d
+
+# 🚩 Features and Todo List
+- [x] 🔥🔥 Release Zero123++ fine-tuning code. 
+- [x] 🔥🔥 Support for running gradio demo on two GPUs to save memory.
+- [x] 🔥🔥 Support for running demo with docker. Please refer to the [docker](docker/) directory.
+- [x] Release inference and training code.
+- [x] Release model weights.
+- [x] Release huggingface gradio demo. Please try it at [demo](https://huggingface.co/spaces/TencentARC/InstantMesh) link.
+- [ ] Add support for more multi-view diffusion models.
+
+# ⚙️ Dependencies and Installation
+
+We recommend using `Python>=3.10`, `PyTorch>=2.1.0`, and `CUDA>=12.1`.
+```bash
+conda create --name instantmesh python=3.10
+conda activate instantmesh
+pip install -U pip
+
+# Ensure Ninja is installed
+conda install Ninja
+
+# Install the correct version of CUDA
+conda install cuda -c nvidia/label/cuda-12.1.0
+
+# Install PyTorch and xformers
+# You may need to install another xformers version if you use a different PyTorch version
+pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121
+pip install xformers==0.0.22.post7
+
+# For Linux users: Install Triton 
+pip install triton
+
+# For Windows users: Use the prebuilt version of Triton provided here:
+pip install https://huggingface.co/r4ziel/xformers_pre_built/resolve/main/triton-2.0.0-cp310-cp310-win_amd64.whl
+
+# Install other requirements
+pip install -r requirements.txt
+```
+
+# 💫 How to Use
+
+## Download the models
+
+We provide 4 sparse-view reconstruction model variants and a customized Zero123++ UNet for white-background image generation in the [model card](https://huggingface.co/TencentARC/InstantMesh).
+
+Our inference script will download the models automatically. Alternatively, you can manually download the models and put them under the `ckpts/` directory.
+
+By default, we use the `instant-mesh-large` reconstruction model variant.
+
+## Start a local gradio demo
+
+To start a gradio demo in your local machine, simply run:
+```bash
+python app.py
+```
+
+If you have multiple GPUs in your machine, the demo app will run on two GPUs automatically to save memory. You can also force it to run on a single GPU:
+```bash
+CUDA_VISIBLE_DEVICES=0 python app.py
+```
+
+Alternatively, you can run the demo with docker. Please follow the instructions in the [docker](docker/) directory.
+
+## Running with command line
+
+To generate 3D meshes from images via command line, simply run:
+```bash
+python run.py configs/instant-mesh-large.yaml examples/hatsune_miku.png --save_video
+```
+
+We use [rembg](https://github.com/danielgatis/rembg) to segment the foreground object. If the input image already has an alpha mask, please specify the `no_rembg` flag:
+```bash
+python run.py configs/instant-mesh-large.yaml examples/hatsune_miku.png --save_video --no_rembg
+```
+
+By default, our script exports a `.obj` mesh with vertex colors, please specify the `--export_texmap` flag if you hope to export a mesh with a texture map instead (this will cost longer time):
+```bash
+python run.py configs/instant-mesh-large.yaml examples/hatsune_miku.png --save_video --export_texmap
+```
+
+Please use a different `.yaml` config file in the [configs](./configs) directory if you hope to use other reconstruction model variants. For example, using the `instant-nerf-large` model for generation:
+```bash
+python run.py configs/instant-nerf-large.yaml examples/hatsune_miku.png --save_video
+```
+**Note:** When using the `NeRF` model variants for image-to-3D generation, exporting a mesh with texture map by specifying `--export_texmap` may cost long time in the UV unwarping step since the default iso-surface extraction resolution is `256`. You can set a lower iso-surface extraction resolution in the config file.
+
+# 💻 Training
+
+We provide our training code to facilitate future research. But we cannot provide the training dataset due to its size. Please refer to our [dataloader](src/data/objaverse.py) for more details.
+
+To train the sparse-view reconstruction models, please run:
+```bash
+# Training on NeRF representation
+python train.py --base configs/instant-nerf-large-train.yaml --gpus 0,1,2,3,4,5,6,7 --num_nodes 1
+
+# Training on Mesh representation
+python train.py --base configs/instant-mesh-large-train.yaml --gpus 0,1,2,3,4,5,6,7 --num_nodes 1
+```
+
+We also provide our Zero123++ fine-tuning code since it is frequently requested. The running command is:
+```bash
+python train.py --base configs/zero123plus-finetune.yaml --gpus 0,1,2,3,4,5,6,7 --num_nodes 1
+```
+
+# :books: Citation
+
+If you find our work useful for your research or applications, please cite using this BibTeX:
+
+```BibTeX
+@article{xu2024instantmesh,
+  title={InstantMesh: Efficient 3D Mesh Generation from a Single Image with Sparse-view Large Reconstruction Models},
+  author={Xu, Jiale and Cheng, Weihao and Gao, Yiming and Wang, Xintao and Gao, Shenghua and Shan, Ying},
+  journal={arXiv preprint arXiv:2404.07191},
+  year={2024}
+}
+```
+
+# 🤗 Acknowledgements
+
+We thank the authors of the following projects for their excellent contributions to 3D generative AI!
+
+- [Zero123++](https://github.com/SUDO-AI-3D/zero123plus)
+- [OpenLRM](https://github.com/3DTopia/OpenLRM)
+- [FlexiCubes](https://github.com/nv-tlabs/FlexiCubes)
+- [Instant3D](https://instant-3d.github.io/)
+
+Thank [@camenduru](https://github.com/camenduru) for implementing [Replicate Demo](https://replicate.com/camenduru/instantmesh) and [Colab Demo](https://colab.research.google.com/github/camenduru/InstantMesh-jupyter/blob/main/InstantMesh_jupyter.ipynb)!  
+Thank [@jtydhr88](https://github.com/jtydhr88) for implementing [ComfyUI support](https://github.com/jtydhr88/ComfyUI-InstantMesh)!
--- a/InstantMesh/app.py
+++ b/InstantMesh/app.py
+import os
+import imageio
+import numpy as np
+import torch
+import rembg
+from PIL import Image
+from torchvision.transforms import v2
+from pytorch_lightning import seed_everything
+from omegaconf import OmegaConf
+from einops import rearrange, repeat
+from tqdm import tqdm
+from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
+
+from src.utils.train_util import instantiate_from_config
+from src.utils.camera_util import (
+    FOV_to_intrinsics,
+    get_zero123plus_input_cameras,
+    get_circular_camera_poses,
+)
+from src.utils.mesh_util import save_obj, save_glb
+from src.utils.infer_util import remove_background, resize_foreground, images_to_video
+
+import tempfile
+
+if torch.cuda.is_available() and torch.cuda.device_count() >= 2:
+    device0 = torch.device('cuda:0')
+    device1 = torch.device('cuda:1')
+else:
+    device0 = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    device1 = device0
+
+
+def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False):
+    """
+    Get the rendering camera parameters.
+    """
+    c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation)
+    if is_flexicubes:
+        cameras = torch.linalg.inv(c2ws)
+        cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1, 1)
+    else:
+        extrinsics = c2ws.flatten(-2)
+        intrinsics = FOV_to_intrinsics(30.0).unsqueeze(0).repeat(M, 1, 1).float().flatten(-2)
+        cameras = torch.cat([extrinsics, intrinsics], dim=-1)
+        cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1)
+    return cameras
+
+
+def images_to_video(images, output_path, fps=30):
+    # images: (N, C, H, W)
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+    frames = []
+    for i in range(images.shape[0]):
+        frame = (images[i].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8).clip(0, 255)
+        assert frame.shape[0] == images.shape[2] and frame.shape[1] == images.shape[3], \
+            f"Frame shape mismatch: {frame.shape} vs {images.shape}"
+        assert frame.min() >= 0 and frame.max() <= 255, \
+            f"Frame value out of range: {frame.min()} ~ {frame.max()}"
+        frames.append(frame)
+    imageio.mimwrite(output_path, np.stack(frames), fps=fps, codec='h264')
+
+
+###############################################################################
+# Configuration.
+###############################################################################
+
+seed_everything(0)
+
+config_path = 'configs/instant-mesh-large.yaml'
+config = OmegaConf.load(config_path)
+config_name = os.path.basename(config_path).replace('.yaml', '')
+model_config = config.model_config
+infer_config = config.infer_config
+
+IS_FLEXICUBES = True if config_name.startswith('instant-mesh') else False
+
+device = torch.device('cuda')
+
+# load diffusion model
+print('Loading diffusion model ...')
+pipeline = DiffusionPipeline.from_pretrained(
+    "sudo-ai/zero123plus-v1.2",
+    custom_pipeline="zero123plus",
+    torch_dtype=torch.float16
+)
+pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
+    pipeline.scheduler.config, timestep_spacing='trailing'
+)
+
+# load custom white-background UNet
+unet_ckpt_path = "TencentARC/InstantMesh/diffusion_pytorch_model.bin"
+state_dict = torch.load(unet_ckpt_path, map_location='cpu')
+pipeline.unet.load_state_dict(state_dict, strict=True)
+
+pipeline = pipeline.to(device0)
+
+# load reconstruction model
+print('Loading reconstruction model ...')
+model_ckpt_path = "TencentARC/InstantMesh/instant_mesh_large.ckpt"
+model = instantiate_from_config(model_config)
+state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
+state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.') and 'source_camera' not in k}
+model.load_state_dict(state_dict, strict=True)
+
+model = model.to(device1)
+if IS_FLEXICUBES:
+    model.init_flexicubes_geometry(device1, fovy=30.0)
+model = model.eval()
+
+print('Loading Finished!')
+
+
+def check_input_image(input_image):
+    if input_image is None:
+        raise gr.Error("No image uploaded!")
+
+
+def preprocess(input_image, do_remove_background):
+    rembg_session = rembg.new_session() if do_remove_background else None
+    if do_remove_background:
+        input_image = remove_background(input_image, rembg_session)
+        input_image = resize_foreground(input_image, 0.85)
+
+    return input_image
+
+
+def generate_mvs(input_image, sample_steps, sample_seed):
+    seed_everything(sample_seed)
+
+    # sampling
+    generator = torch.Generator(device=device0)
+    z123_image = pipeline(
+        input_image,
+        num_inference_steps=sample_steps,
+        generator=generator,
+    ).images[0]
+
+    show_image = np.asarray(z123_image, dtype=np.uint8)
+    show_image = torch.from_numpy(show_image)  # (960, 640, 3)
+    show_image = rearrange(show_image, '(n h) (m w) c -> (n m) h w c', n=3, m=2)
+    show_image = rearrange(show_image, '(n m) h w c -> (n h) (m w) c', n=2, m=3)
+    show_image = Image.fromarray(show_image.numpy())
+
+    return z123_image, show_image
+
+
+def make_mesh(mesh_fpath, planes):
+    mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
+    mesh_dirname = os.path.dirname(mesh_fpath)
+    mesh_glb_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.glb")
+
+    with torch.no_grad():
+        # get mesh
+
+        mesh_out = model.extract_mesh(
+            planes,
+            use_texture_map=False,
+            **infer_config,
+        )
+
+        vertices, faces, vertex_colors = mesh_out
+        vertices = vertices[:, [1, 2, 0]]
+
+        save_glb(vertices, faces, vertex_colors, mesh_glb_fpath)
+        save_obj(vertices, faces, vertex_colors, mesh_fpath)
+
+        print(f"Mesh saved to {mesh_fpath}")
+
+    return mesh_fpath, mesh_glb_fpath
+
+
+def make3d(images):
+    images = np.asarray(images, dtype=np.float32) / 255.0
+    images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float()  # (3, 960, 640)
+    images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=3, m=2)  # (6, 3, 320, 320)
+
+    input_cameras = get_zero123plus_input_cameras(batch_size=1, radius=4.0).to(device1)
+    render_cameras = get_render_cameras(
+        batch_size=1, radius=4.5, elevation=20.0, is_flexicubes=IS_FLEXICUBES).to(device1)
+
+    images = images.unsqueeze(0).to(device1)
+    images = v2.functional.resize(images, (320, 320), interpolation=3, antialias=True).clamp(0, 1)
+
+    mesh_fpath = tempfile.NamedTemporaryFile(suffix=f".obj", delete=False).name
+    print(mesh_fpath)
+    mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
+    mesh_dirname = os.path.dirname(mesh_fpath)
+    video_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.mp4")
+
+    with torch.no_grad():
+        # get triplane
+        planes = model.forward_planes(images, input_cameras)
+
+        # get video
+        chunk_size = 20 if IS_FLEXICUBES else 1
+        render_size = 384
+
+        frames = []
+        for i in tqdm(range(0, render_cameras.shape[1], chunk_size)):
+            if IS_FLEXICUBES:
+                frame = model.forward_geometry(
+                    planes,
+                    render_cameras[:, i:i + chunk_size],
+                    render_size=render_size,
+                )['img']
+            else:
+                frame = model.synthesizer(
+                    planes,
+                    cameras=render_cameras[:, i:i + chunk_size],
+                    render_size=render_size,
+                )['images_rgb']
+            frames.append(frame)
+        frames = torch.cat(frames, dim=1)
+
+        images_to_video(
+            frames[0],
+            video_fpath,
+            fps=30,
+        )
+
+        print(f"Video saved to {video_fpath}")
+
+    mesh_fpath, mesh_glb_fpath = make_mesh(mesh_fpath, planes)
+
+    return video_fpath, mesh_fpath, mesh_glb_fpath
+
+
+import gradio as gr
+
+_HEADER_ = '''
+<h2><b>Official 🤗 Gradio Demo</b></h2><h2><a href='https://github.com/TencentARC/InstantMesh' target='_blank'><b>InstantMesh: Efficient 3D Mesh Generation from a Single Image with Sparse-view Large Reconstruction Models</b></a></h2>
+
+**InstantMesh** is a feed-forward framework for efficient 3D mesh generation from a single image based on the LRM/Instant3D architecture.
+
+Code: <a href='https://github.com/TencentARC/InstantMesh' target='_blank'>GitHub</a>. Techenical report: <a href='https://arxiv.org/abs/2404.07191' target='_blank'>ArXiv</a>.
+
+❗️❗️❗️**Important Notes:**
+- Our demo can export a .obj mesh with vertex colors or a .glb mesh now. If you prefer to export a .obj mesh with a **texture map**, please refer to our <a href='https://github.com/TencentARC/InstantMesh?tab=readme-ov-file#running-with-command-line' target='_blank'>Github Repo</a>.
+- The 3D mesh generation results highly depend on the quality of generated multi-view images. Please try a different **seed value** if the result is unsatisfying (Default: 42).
+'''
+
+_CITE_ = r"""
+If InstantMesh is helpful, please help to ⭐ the <a href='https://github.com/TencentARC/InstantMesh' target='_blank'>Github Repo</a>. Thanks! [![GitHub Stars](https://img.shields.io/github/stars/TencentARC/InstantMesh?style=social)](https://github.com/TencentARC/InstantMesh)
+---
+📝 **Citation**
+
+If you find our work useful for your research or applications, please cite using this bibtex:
+```bibtex
+@article{xu2024instantmesh,
+  title={InstantMesh: Efficient 3D Mesh Generation from a Single Image with Sparse-view Large Reconstruction Models},
+  author={Xu, Jiale and Cheng, Weihao and Gao, Yiming and Wang, Xintao and Gao, Shenghua and Shan, Ying},
+  journal={arXiv preprint arXiv:2404.07191},
+  year={2024}
+}
+```
+
+📋 **License**
+
+Apache-2.0 LICENSE. Please refer to the [LICENSE file](https://huggingface.co/spaces/TencentARC/InstantMesh/blob/main/LICENSE) for details.
+
+📧 **Contact**
+
+If you have any questions, feel free to open a discussion or contact us at <b>bluestyle928@gmail.com</b>.
+"""
+
+with gr.Blocks() as demo:
+    gr.Markdown(_HEADER_)
+    with gr.Row(variant="panel"):
+        with gr.Column():
+            with gr.Row():
+                input_image = gr.Image(
+                    label="Input Image",
+                    image_mode="RGBA",
+                    sources="upload",
+                    width=256,
+                    height=256,
+                    type="pil",
+                    elem_id="content_image",
+                )
+                processed_image = gr.Image(
+                    label="Processed Image",
+                    image_mode="RGBA",
+                    width=256,
+                    height=256,
+                    type="pil",
+                    interactive=False
+                )
+            with gr.Row():
+                with gr.Group():
+                    do_remove_background = gr.Checkbox(
+                        label="Remove Background", value=True
+                    )
+                    sample_seed = gr.Number(value=42, label="Seed Value", precision=0)
+
+                    sample_steps = gr.Slider(
+                        label="Sample Steps",
+                        minimum=30,
+                        maximum=75,
+                        value=75,
+                        step=5
+                    )
+
+            with gr.Row():
+                submit = gr.Button("Generate", elem_id="generate", variant="primary")
+
+            with gr.Row(variant="panel"):
+                gr.Examples(
+                    examples=[
+                        os.path.join("examples", img_name) for img_name in sorted(os.listdir("examples"))
+                    ],
+                    inputs=[input_image],
+                    label="Examples",
+                    examples_per_page=20
+                )
+
+        with gr.Column():
+            with gr.Row():
+                with gr.Column():
+                    mv_show_images = gr.Image(
+                        label="Generated Multi-views",
+                        type="pil",
+                        width=379,
+                        interactive=False
+                    )
+
+                with gr.Column():
+                    output_video = gr.Video(
+                        label="video", format="mp4",
+                        width=379,
+                        autoplay=True,
+                        interactive=False
+                    )
+
+            with gr.Row():
+                with gr.Tab("OBJ"):
+                    output_model_obj = gr.Model3D(
+                        label="Output Model (OBJ Format)",
+                        # width=768,
+                        interactive=False,
+                    )
+                    gr.Markdown(
+                        "Note: Downloaded .obj model will be flipped. Export .glb instead or manually flip it before usage.")
+                with gr.Tab("GLB"):
+                    output_model_glb = gr.Model3D(
+                        label="Output Model (GLB Format)",
+                        # width=768,
+                        interactive=False,
+                    )
+                    gr.Markdown("Note: The model shown here has a darker appearance. Download to get correct results.")
+
+            with gr.Row():
+                gr.Markdown('''Try a different <b>seed value</b> if the result is unsatisfying (Default: 42).''')
+
+    gr.Markdown(_CITE_)
+    mv_images = gr.State()
+
+    submit.click(fn=check_input_image, inputs=[input_image]).success(
+        fn=preprocess,
+        inputs=[input_image, do_remove_background],
+        outputs=[processed_image],
+    ).success(
+        fn=generate_mvs,
+        inputs=[processed_image, sample_steps, sample_seed],
+        outputs=[mv_images, mv_show_images],
+    ).success(
+        fn=make3d,
+        inputs=[mv_images],
+        outputs=[output_video, output_model_obj, output_model_glb]
+    )
+
+demo.queue(max_size=10)
+demo.launch(server_name="0.0.0.0", share=True)
--- a/InstantMesh/assets/teaser.mp4
+++ b/InstantMesh/assets/teaser.mp4
--- a/InstantMesh/configs/instant-mesh-base.yaml
+++ b/InstantMesh/configs/instant-mesh-base.yaml
+model_config:
+  target: src.models.lrm_mesh.InstantMesh
+  params:
+    encoder_feat_dim: 768
+    encoder_freeze: false
+    encoder_model_name: facebook/dino-vitb16
+    transformer_dim: 1024
+    transformer_layers: 12
+    transformer_heads: 16
+    triplane_low_res: 32
+    triplane_high_res: 64
+    triplane_dim: 40
+    rendering_samples_per_ray: 96
+    grid_res: 128
+    grid_scale: 2.1
+
+
+infer_config:
+  unet_path: ckpts/diffusion_pytorch_model.bin
+  model_path: ckpts/instant_mesh_base.ckpt
+  texture_resolution: 1024
+  render_resolution: 512
\ No newline at end of file
--- a/InstantMesh/configs/instant-mesh-large-train.yaml
+++ b/InstantMesh/configs/instant-mesh-large-train.yaml
+model:
+  base_learning_rate: 4.0e-05
+  target: src.model_mesh.MVRecon
+  params:
+    init_ckpt: logs/instant-nerf-large-train/checkpoints/last.ckpt
+    input_size: 320
+    render_size: 512
+    
+    lrm_generator_config:
+      target: src.models.lrm_mesh.InstantMesh
+      params:
+        encoder_feat_dim: 768
+        encoder_freeze: false
+        encoder_model_name: facebook/dino-vitb16
+        transformer_dim: 1024
+        transformer_layers: 16
+        transformer_heads: 16
+        triplane_low_res: 32
+        triplane_high_res: 64
+        triplane_dim: 80
+        rendering_samples_per_ray: 128
+        grid_res: 128
+        grid_scale: 2.1
+
+
+data:
+  target: src.data.objaverse.DataModuleFromConfig
+  params:
+    batch_size: 2
+    num_workers: 8
+    train:
+      target: src.data.objaverse.ObjaverseData
+      params:
+        root_dir: data/objaverse
+        meta_fname: filtered_obj_name.json
+        input_image_dir: rendering_random_32views
+        target_image_dir: rendering_random_32views
+        input_view_num: 6
+        target_view_num: 4
+        total_view_n: 32
+        fov: 50
+        camera_rotation: true
+        validation: false
+    validation:
+      target: src.data.objaverse.ValidationData
+      params:
+        root_dir: data/valid_samples
+        input_view_num: 6
+        input_image_size: 320
+        fov: 30
+
+
+lightning:
+  modelcheckpoint:
+    params:
+      every_n_train_steps: 2000
+      save_top_k: -1
+      save_last: true
+  callbacks: {}
+
+  trainer:
+    benchmark: true
+    max_epochs: -1
+    val_check_interval: 1000
+    num_sanity_val_steps: 0
+    accumulate_grad_batches: 1
+    check_val_every_n_epoch: null   # if not set this, validation does not run
--- a/InstantMesh/configs/instant-mesh-large.yaml
+++ b/InstantMesh/configs/instant-mesh-large.yaml
+model_config:
+  target: src.models.lrm_mesh.InstantMesh
+  params:
+    encoder_feat_dim: 768
+    encoder_freeze: false
+    encoder_model_name: facebook/dino-vitb16
+    transformer_dim: 1024
+    transformer_layers: 16
+    transformer_heads: 16
+    triplane_low_res: 32
+    triplane_high_res: 64
+    triplane_dim: 80
+    rendering_samples_per_ray: 128
+    grid_res: 128
+    grid_scale: 2.1
+
+
+infer_config:
+  unet_path: ckpts/diffusion_pytorch_model.bin
+  model_path: ckpts/instant_mesh_large.ckpt
+  texture_resolution: 1024
+  render_resolution: 512
\ No newline at end of file
--- a/InstantMesh/configs/instant-nerf-base.yaml
+++ b/InstantMesh/configs/instant-nerf-base.yaml
+model_config:
+  target: src.models.lrm.InstantNeRF
+  params:
+    encoder_feat_dim: 768
+    encoder_freeze: false
+    encoder_model_name: facebook/dino-vitb16
+    transformer_dim: 1024
+    transformer_layers: 12
+    transformer_heads: 16
+    triplane_low_res: 32
+    triplane_high_res: 64
+    triplane_dim: 40
+    rendering_samples_per_ray: 96
+
+
+infer_config:
+  unet_path: ckpts/diffusion_pytorch_model.bin
+  model_path: ckpts/instant_nerf_base.ckpt
+  mesh_threshold: 10.0
+  mesh_resolution: 256
+  render_resolution: 384
\ No newline at end of file
--- a/InstantMesh/configs/instant-nerf-large-train.yaml
+++ b/InstantMesh/configs/instant-nerf-large-train.yaml
+model:
+  base_learning_rate: 4.0e-04
+  target: src.model.MVRecon
+  params:
+    input_size: 320
+    render_size: 192
+    
+    lrm_generator_config:
+      target: src.models.lrm.InstantNeRF
+      params:
+        encoder_feat_dim: 768
+        encoder_freeze: false
+        encoder_model_name: facebook/dino-vitb16
+        transformer_dim: 1024
+        transformer_layers: 16
+        transformer_heads: 16
+        triplane_low_res: 32
+        triplane_high_res: 64
+        triplane_dim: 80
+        rendering_samples_per_ray: 128
+
+
+data:
+  target: src.data.objaverse.DataModuleFromConfig
+  params:
+    batch_size: 2
+    num_workers: 8
+    train:
+      target: src.data.objaverse.ObjaverseData
+      params:
+        root_dir: data/objaverse
+        meta_fname: filtered_obj_name.json
+        input_image_dir: rendering_random_32views
+        target_image_dir: rendering_random_32views
+        input_view_num: 6
+        target_view_num: 4
+        total_view_n: 32
+        fov: 50
+        camera_rotation: true
+        validation: false
+    validation:
+      target: src.data.objaverse.ValidationData
+      params:
+        root_dir: data/valid_samples
+        input_view_num: 6
+        input_image_size: 320
+        fov: 30
+
+
+lightning:
+  modelcheckpoint:
+    params:
+      every_n_train_steps: 1000
+      save_top_k: -1
+      save_last: true
+  callbacks: {}
+
+  trainer:
+    benchmark: true
+    max_epochs: -1
+    gradient_clip_val: 1.0
+    val_check_interval: 1000
+    num_sanity_val_steps: 0
+    accumulate_grad_batches: 1
+    check_val_every_n_epoch: null   # if not set this, validation does not run
--- a/InstantMesh/configs/instant-nerf-large.yaml
+++ b/InstantMesh/configs/instant-nerf-large.yaml
+model_config:
+  target: src.models.lrm.InstantNeRF
+  params:
+    encoder_feat_dim: 768
+    encoder_freeze: false
+    encoder_model_name: facebook/dino-vitb16
+    transformer_dim: 1024
+    transformer_layers: 16
+    transformer_heads: 16
+    triplane_low_res: 32
+    triplane_high_res: 64
+    triplane_dim: 80
+    rendering_samples_per_ray: 128
+
+
+infer_config:
+  unet_path: ckpts/diffusion_pytorch_model.bin
+  model_path: ckpts/instant_nerf_large.ckpt
+  mesh_threshold: 10.0
+  mesh_resolution: 256
+  render_resolution: 384
\ No newline at end of file
--- a/InstantMesh/configs/zero123plus-finetune.yaml
+++ b/InstantMesh/configs/zero123plus-finetune.yaml
+model:
+  base_learning_rate: 1.0e-05
+  target: zero123plus.model.MVDiffusion
+  params:
+    drop_cond_prob: 0.1
+
+    stable_diffusion_config:
+      pretrained_model_name_or_path: sudo-ai/zero123plus-v1.2
+      custom_pipeline: ./zero123plus
+
+data:
+  target: src.data.objaverse_zero123plus.DataModuleFromConfig
+  params:
+    batch_size: 6
+    num_workers: 8
+    train:
+      target: src.data.objaverse_zero123plus.ObjaverseData
+      params:
+        root_dir: data/objaverse
+        meta_fname: lvis-annotations.json
+        image_dir: rendering_zero123plus
+        validation: false
+    validation:
+      target: src.data.objaverse_zero123plus.ObjaverseData
+      params:
+        root_dir: data/objaverse
+        meta_fname: lvis-annotations.json
+        image_dir: rendering_zero123plus
+        validation: true
+
+
+lightning:
+  modelcheckpoint:
+    params:
+      every_n_train_steps: 1000
+      save_top_k: -1
+      save_last: true
+  callbacks: {}
+
+  trainer:
+    benchmark: true
+    max_epochs: -1
+    gradient_clip_val: 1.0
+    val_check_interval: 1000
+    num_sanity_val_steps: 0
+    accumulate_grad_batches: 1
+    check_val_every_n_epoch: null   # if not set this, validation does not run
--- a/InstantMesh/docker/Dockerfile
+++ b/InstantMesh/docker/Dockerfile
+# get the development image from nvidia cuda 12.1
+FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04
+
+LABEL name="instantmesh" maintainer="instantmesh"
+
+# Add a volume for downloaded models
+VOLUME /workspace/models
+
+# create workspace folder and set it as working directory
+RUN mkdir -p /workspace/instantmesh
+WORKDIR /workspace
+
+# Set the timezone
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && \
+    apt-get install -y tzdata && \
+    ln -fs /usr/share/zoneinfo/America/Chicago /etc/localtime && \ 
+    dpkg-reconfigure --frontend noninteractive tzdata
+
+# update package lists and install git, wget, vim, libegl1-mesa-dev, and libglib2.0-0
+RUN apt-get update && \
+    apt-get install -y build-essential git wget vim libegl1-mesa-dev libglib2.0-0 unzip
+
+# install conda
+RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
+    chmod +x Miniconda3-latest-Linux-x86_64.sh && \
+    ./Miniconda3-latest-Linux-x86_64.sh -b -p /workspace/miniconda3 && \
+    rm Miniconda3-latest-Linux-x86_64.sh
+
+# update PATH environment variable
+ENV PATH="/workspace/miniconda3/bin:${PATH}"
+
+# initialize conda
+RUN conda init bash
+
+# create and activate conda environment
+RUN conda create -n instantmesh python=3.10 && echo "source activate instantmesh" > ~/.bashrc
+ENV PATH /workspace/miniconda3/envs/instantmesh/bin:$PATH
+
+RUN conda install Ninja
+RUN conda install cuda -c nvidia/label/cuda-12.4.1 -y
+
+RUN pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121
+RUN pip install xformers==0.0.22.post7
+RUN pip install triton
+
+# change the working directory to the repository
+WORKDIR /workspace/instantmesh
+
+# other dependencies
+ADD ./requirements.txt /workspace/instantmesh/requirements.txt
+RUN pip install -r requirements.txt
+
+COPY . /workspace/instantmesh
+
+# Run the command when the container starts
+CMD ["python", "app.py"]
--- a/InstantMesh/docker/README.md
+++ b/InstantMesh/docker/README.md
+# Docker setup
+
+This docker setup is tested on Ubuntu.
+
+make sure you are under directory `yourworkspace/instantmesh/`
+
+Build docker image:
+
+```bash
+docker build -t instantmesh -f docker/Dockerfile .
+```
+
+Run docker image with a local model cache (so it is fast when container is started next time):
+
+```bash
+mkdir -p $HOME/models/
+export MODEL_DIR=$HOME/models/
+
+docker run -it -p 43839:43839 --platform=linux/amd64 --gpus all -v $MODEL_DIR:/workspace/instantmesh/models instantmesh
+```
+
+To use specific GPUs:
+
+```bash
+docker run -it -p 43839:43839 --platform=linux/amd64 --gpus '"device=0,1"' -v $MODEL_DIR:/workspace/instantmesh/models instantmesh
+```
+
+Navigate to `http://localhost:43839` to use the demo.
--- a/InstantMesh/examples/bird.jpg
+++ b/InstantMesh/examples/bird.jpg
--- a/InstantMesh/examples/blue_cat.png
+++ b/InstantMesh/examples/blue_cat.png
--- a/InstantMesh/examples/bubble_mart_blue.png
+++ b/InstantMesh/examples/bubble_mart_blue.png
--- a/InstantMesh/examples/bulldog.png
+++ b/InstantMesh/examples/bulldog.png
--- a/InstantMesh/examples/cake.jpg
+++ b/InstantMesh/examples/cake.jpg