init

346d2571 · luopl · 346d2571 · 346d2571 · 346d2571 · 346d2571
Commit 346d2571 authored Oct 24, 2024 by luopl
20 changed files
--- a/Dockerfile
+++ b/Dockerfile
+FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-ubuntu20.04-dtk24.04.2-py3.10
\ No newline at end of file
--- a/LICENSE
+++ b/LICENSE
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/README.md
+++ b/README.md
+# LinFusion
+## 论文
+LinFusion: 1 GPU, 1 Minute, 16K Image
+- https://arxiv.org/abs/2409.02097
+## 模型结构
+作者将所提出的 Generalized Linear Attention 模块集成到 SD 的架构中，替换原始的 Self-Attention 模块，生成的模型称为 LinFusion。使用知识蒸馏策略，只训练线性注意模块 50K 步，LinFusion 的性能即可与原始 SD 相当甚至更好，同时显著降低了时间和显存占用的复杂度。
+<div align=center>
+    <img src="./assets/linfusin_overview.png"/>
+</div>
+## 算法原理
+为了得到具有线性计算复杂度的 Diffusion Backbone，一个简单的方案是使用 Mamba2 替换所有的 Self-Attention，如图 4 (a) 所示。作者使用双向的 SSM 来确保当前位置可以从后续位置访问信息。SD 中的 Self-Attention 模块不包含 Mamba2 中的门控操作或者 RMS-Norm。作者为了保持一致性，就删除了这些结构，导致性能略有提高。
+<div align=center>
+    <img src="./assets/principle.png"/>
+</div>
+## 环境配置
+### Docker（方法一）
+推荐使用docker方式运行， 此处提供[光源](https://www.sourcefind.cn/#/service-details)拉取docker镜像的地址与使用步骤
+```
+docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-ubuntu20.04-dtk24.04.2-py3.10
+docker run -it --shm-size=1024G -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal:/opt/hyhal --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name linfusion_pytorch  <your IMAGE ID> bash # <your IMAGE ID>为以上拉取的docker的镜像ID替换，本镜像为：4555f389bc2a
+cd /path/your_code_data/
+pip install git+https://github.com/openai/CLIP.git
+pip install click clean-fid open_clip_torch
+```
+Tips:以上dtk驱动、python、torch、vllm等DCU相关工具版本需要严格一一对应。
+### Dockerfile（方法二）
+此处提供dockerfile的使用方法
+```
+docker build -t linfusion:latest .
+docker run -it --shm-size=1024G -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal:/opt/hyhal --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name linfusion_pytorch linfusion bash 
+cd /path/your_code_data/
+pip install git+https://github.com/openai/CLIP.git
+pip install click clean-fid open_clip_torch
+```
+### Anaconda（方法三）
+此处提供本地配置、编译的详细步骤，例如：
+关于本项目DCU显卡所需的特殊深度学习库可从[光合](https://developer.hpccube.com/tool/)开发者社区下载安装。
+```
+DTK驱动:dtk24.04.2
+python:3.10
+torch:2.1.0
+```
+`Tips：以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应`
+其它非深度学习库参照requirement.txt安装：
+```
+cd /path/your_code_data/
+pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/  --trusted-host mirrors.aliyun.com
+pip install git+https://github.com/openai/CLIP.git
+pip install click clean-fid open_clip_torch
+```
+## 数据集
+如果没有，执行训练指令时代码将默认自动将bhargavsdesai/laion_improved_aesthetics_6.5plus_with_images 数据集下载到目录中，其中包含 169k 张图像，需要约 75 GB 的磁盘空间。~/.cache
+训练数据集SCNet快速下载链接[bhargavsdesai/laion_improved_aesthetics_6.5plus_with_images](http://113.200.138.88:18080/aidatasets/bhargavsdesai/laion_improved_aesthetics_6.5plus_with_images.git)
+训练数据目录结构如下：
+```
+ ── bhargavsdesai/laion_improved_aesthetics_6.5plus_with_images
+    ├── train-00000-of-00080-b8c547951c435f2e.parquet
+    ├── train-00001-of-00080-6502db8bd493f966.parquet
+    ├── train-00002-of-00080-73d42259ed4d3c6c.parquet
+    └── ...
+```
+验证数据集下载整理如下，也可通过scnet快速下载链接[coco/val2014](http://113.200.138.88:18080/aidatasets/project-dependency/coco2014)下载：
+```
+wget http://images.cocodataset.org/zips/val2014.zip
+unzip val2014.zip -d /path/to/coco
+```
+## 训练
+### 单机单卡
+```
+cd /path/your_code_data/
+bash ./examples/train/train.sh
+```
+### 单机多卡
+```
+bash ./examples/training/distill.sh
+```
+## 推理
+### 单机单卡
+inference:
+```
+cd /path/your_code_data/
+#注意：可修改pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0"为自己的模型路径
+python  examples/inference/sdxl_distrifusion_example.py
+```
+运行examples/eval/eval.sh以生成用于评估的图像。
+```
+#注意：您可能需要指定outdir、repo_id、resolution等
+bash examples/eval/singleDCU_eval.sh
+```
+### 单机多卡
+```
+#其中，--nproc_per_node为使用卡数。
+bash examples/eval/eval.sh
+```
+#运行examples/eval/calculate_metrics.sh以计算指标。您可能需要指定/path/to/coco、fake_dir等。
+```
+#运行时会自动下载clip模型，可离线下载openclip模型laion/CLIP-ViT-g-14-laion2B-s12B-b42K
+#同时修改src/eval/calculate_metrics.py中compute_clip_score函数的下述代码行：
+#clip, _, clip_preprocessor = open_clip.create_model_and_transforms("ViT-g-14", pretrained="laion2b_s12b_b42k")中pretrained为你的模型地址
+#例如：pretrained="/data/luopl/LinFusion/laion/CLIP-ViT-g-14-laion2B-s12B-b42K/open_clip_pytorch_model.bin
+bash examples/eval/calculate_metrics.sh
+```
+## result
+使用的加速卡:4张 K100_AI 
+模型：
+- stabilityai/stable-diffusion-xl-base-1.0
+- Yuanshi/LinFusion-XL
+文生图结果：
+inference:
+<div align=left>
+    <img src="./assets/astronaut.png"/>
+</div>
+### 精度
+使用的加速卡:4张 K100_AI 
+<div align=left>
+    <img src="./assets/acc.png"/>
+</div>
+## 应用场景
+### 算法类别
+`以文生图`
+### 热点应用行业
+`科研,教育,政府,金融`
+## 预训练权重
+[stabilityai/stable-diffusion-v1-5模型下载SCNet链接](http://113.200.138.88:18080/aimodels/stable-diffusion-v1-5)
+[stabilityai/stable-diffusion-2-1模型下载SCNet链接](http://113.200.138.88:18080/aimodels/stable-diffusion-2-1)
+[stabilityai/stable-diffusion-xl-base-1.0模型下载SCNet链接](http://113.200.138.88:18080/aimodels/stable-diffusion-xl-base-1.0)
+[Yuanshi/LinFusion-1-5模型下载SCNet链接](http://113.200.138.88:18080/aimodels/yuanshi/LinFusion-1-5.git)
+[Yuanshi/LinFusion-2-1模型下载SCNet链接](http://113.200.138.88:18080/aimodels/yuanshi/LinFusion-2-1.git)
+[Yuanshi/LinFusion-XL模型下载SCNet链接](http://113.200.138.88:18080/aimodels/yuanshi/LinFusion-XL.git)
+[laion/CLIP-ViT-g-14-laion2B-s12B-b42K模型下载SCNet链接](http://113.200.138.88:18080/aimodels/clip-vit-g-14-laion2b-s12b-b42k)
+## 源码仓库及问题反馈
+- http://developer.hpccube.com/codes/modelzoo/linfusion_pytorch.git
+## 参考资料
+- https://github.com/Huage001/LinFusion/
--- a/README_ori.md
+++ b/README_ori.md
+<div align="center">
+# LinFusion
+<a href="https://arxiv.org/abs/2409.02097"><img src="https://img.shields.io/badge/arXiv-2409.02097-A42C25.svg" alt="arXiv"></a> 
+<a  href="https://lv-linfusion.github.io"><img src="https://img.shields.io/badge/ProjectPage-LinFusion-376ED2#376ED2.svg" alt="Home Page"></a>
+<a href="https://huggingface.co/spaces/Huage001/LinFusion-SD-v1.5"><img src="https://img.shields.io/static/v1?label=HuggingFace&message=gradio demo&color=yellow"></a>
+</div>
+> **LinFusion: 1 GPU, 1 Minute, 16K Image**
+> <br>
+> [Songhua Liu](http://121.37.94.87/), 
+> [Weuhao Yu](https://whyu.me/), 
+> [Zhenxiong Tan](https://scholar.google.com/citations?user=HP9Be6UAAAAJ&hl=en), 
+> and 
+> [Xinchao Wang](https://sites.google.com/site/sitexinchaowang/)
+> <br>
+> [Learning and Vision Lab](http://lv-nus.org/), National University of Singapore
+> <br>
+![](./assets/picture.png)
+## 🔥News
+**[2024/09/28]** We release evaluation codes on the COCO benchmark! 
+**[2024/09/27]** We successfully integrate LinFusion to [DistriFusion](https://github.com/mit-han-lab/distrifuser), an effective and efficient strategy for generating an image in parallel, and achieve more significant acceleration! Please refer to the example [here](https://github.com/Huage001/LinFusion/blob/main/examples/inference/sdxl_distrifusion_example.py)!
+**[2024/09/26]** We enable **16K** image generation with merely **24G** video memory! Please refer to the example [here](https://github.com/Huage001/LinFusion/blob/main/examples/inference/superres_sdxl_low_v_mem.ipynb)!
+**[2024/09/20]** We release **a more advanced pipeline for ultra-high-resolution image generation using SD-XL**! It can be used for [text-to-image generation](https://github.com/Huage001/LinFusion/blob/main/examples/inference/ultra_text2image_sdxl.ipynb) and [image super-resolution](https://github.com/Huage001/LinFusion/blob/main/examples/inference/superres_sdxl.ipynb)!
+**[2024/09/20]** We release training codes for Stable Diffusion XL [here](https://github.com/Huage001/LinFusion/blob/main/src/train/distill_xl.py)!
+**[2024/09/13]** We release LinFusion models for Stable Diffusion v-2.1 and Stable Diffusion XL!
+**[2024/09/13]** We release training codes for Stable Diffusion v-1.5, v-2.1, and their variants [here](https://github.com/Huage001/LinFusion/blob/main/src/train/distill.py)!
+**[2024/09/08]** We release codes for **16K** image generation [here](https://github.com/Huage001/LinFusion/blob/main/examples/inference/ultra_text2image_w_sdedit.ipynb)!
+**[2024/09/05]** [Gradio demo](https://huggingface.co/spaces/Huage001/LinFusion-SD-v1.5) for SD-v1.5 is released! Text-to-image, image-to-image, and IP-Adapter are supported currently.
+## Supported Models
+1. `Yuanshi/LinFusion-1-5`: For Stable Diffusion v-1.5 and its variants. <a href="https://huggingface.co/Yuanshi/LinFusion-1-5"><img src="https://img.shields.io/badge/%F0%9F%A4%97-LinFusion for SD v1.5-yellow"></a>
+1. `Yuanshi/LinFusion-2-1`: For Stable Diffusion v-2.1 and its variants. <a href="https://huggingface.co/Yuanshi/LinFusion-2-1"><img src="https://img.shields.io/badge/%F0%9F%A4%97-LinFusion for SD v2.1-yellow"></a>
+1. `Yuanshi/LinFusion-XL`: For Stable Diffusion XL and its variants. <a href="https://huggingface.co/Yuanshi/LinFusion-XL"><img src="https://img.shields.io/badge/%F0%9F%A4%97-LinFusion for SD XL-yellow"></a>
+## Quick Start
+* If you have not, install [PyTorch](https://pytorch.org/get-started/locally/) and [diffusers](https://huggingface.co/docs/diffusers/index).
+* Clone this repo to your project directory:
+  ``` bash
+  git clone https://github.com/Huage001/LinFusion.git
+  ```
+* **You only need two lines!**
+  ```diff
+  from diffusers import AutoPipelineForText2Image
+  import torch
+  + from src.linfusion import LinFusion
+  sd_repo = "Lykon/dreamshaper-8"
+  pipeline = AutoPipelineForText2Image.from_pretrained(
+      sd_repo, torch_dtype=torch.float16, variant="fp16"
+  ).to(torch.device("cuda"))
+  + linfusion = LinFusion.construct_for(pipeline)
+  image = pipeline(
+      "An astronaut floating in space. Beautiful view of the stars and the universe in the background.",
+      generator=torch.manual_seed(123)
+  ).images[0]
+  ```
+  `LinFusion.construct_for(pipeline)` will return a LinFusion model that matches the pipeline's structure. And this LinFusion model will **automatically mount to** the pipeline's forward function.
+* `examples/inference/basic_usage.ipynb` shows a basic text-to-image example.
+## Gradio Demo
+* Currently, you can try LinFusion for SD-v1.5 online [here](https://huggingface.co/spaces/Huage001/LinFusion-SD-v1.5). Text-to-image, image-to-image, and IP-Adapter are supported currently.
+* We are building Gradio local demos for more base models and applications, so that everyone can deploy the demos locally.
+## Ultrahigh-Resolution Generation
+From the perspective of efficiency, our method supports high-resolution generation such as 16K images. Nevertheless, directly applying diffusion models trained on low resolutions for higher-resolution generation can result in content distortion and duplication. To tackle this challenge, we apply following techniques:
+* [SDEdit](https://huggingface.co/docs/diffusers/v0.30.2/en/api/pipelines/stable_diffusion/img2img#image-to-image). **The basic idea is to generate a low-resolution result at first, based on which we gradually upscale the image.**
+  **Please refer to `examples/inference/ultra_text2image_w_sdedit.ipynb` for an example.**
+* [DemoFusion](https://github.com/PRIS-CV/DemoFusion). It also generates high-resolution images from low-resolution results. Latents of the low-resolution generation are reused for high-resolution generation. Dilated convolutions are involved. Compared with the original version:
+  1. We can generate high-resolution directly with the help of LinFusion instead of using patch-wise strategies. 
+  2. Insights in SDEdit are also applied here, so that the high-resolution branch does not need to go through full denoising steps.
+  3. Image are upscaled to 2x, 4x, 8x, ... resolutions instead of 1x, 2x, 3x, ...
+  **Please refer to `examples/inference/ultra_text2image_sdxl.ipynb` for an example of high-resolution text-to-image generation** (first generate 1024 resolution, then generate 2048, 4096, 8192, etc) **and `examples/inference/superres_sdxl.ipynb` for an example of image super resolution** (directly upscale to the target resolution, generally 2x is recommended and using it multiple times if you want higher scales). 
+* Above codes for 16K image generation require a GPU with 80G video memory. **If you encounter OOM issues, you may consider `examples/inference/superres_sdxl_low_w_mem.ipynb`, which requires only 24G video memory.** We achieve this by 1) chunked forward of classifier-free guidance inference, 2) chunked forward of feed-forward network in Transformer blocks, 3) in-placed activation functions in ResNets, and 4) caching UNet residuals on CPU.
+* [DistriFusion](https://github.com/mit-han-lab/distrifuser). Alternatively, if you have multiple GPU cards, you can try integrating LinFusion to DistriFusion, which achieves **more significant acceleration due to its linearity and thus almost constant communication cost**. You can run an minimal example with:
+  ```bash
+  torchrun --nproc_per_node=$N_GPUS -m examples.inference.sdxl_distrifusion_example
+  ```
+* We are working on integrating LinFusion with more advanced approaches that are dedicated on high-resolution extension! **Feel free to create pull requests if you come up with better solutions!**
+## Training
+* Before training, make sure you have the packages shown in `requirements.txt` installed:
+  ```bash
+  pip install -r requirements.txt
+  ```
+* Training codes for Stable Diffusion v-1.5, v-2.1, and their variants are released in `src/train/distill.py`. We present an exampler running script in `examples/train/distill.sh`. You can run it on a 8-GPU machine via:
+  ```bash
+  bash ./examples/training/distill.sh
+  ```
+  The codes will download `bhargavsdesai/laion_improved_aesthetics_6.5plus_with_images` [dataset](https://huggingface.co/datasets/bhargavsdesai/laion_improved_aesthetics_6.5plus_with_images) automatically to `~/.cache` directory by default if there is not, which contains 169k images and requires ~75 GB disk space.
+  We use fp16 precision and 512 resolution for Stable Diffusion v-1.5 and bf16 precision and 768 resolution for Stable Diffusion v-2.1.
+* Training codes for Stable Diffusion XL are released in `src/train/distill_xl.py`. We present an exampler running script in `examples/train/distill_xl.sh`. You can run it on a 8-GPU machine via:
+  ```bash
+  bash ./examples/training/distill_xl.sh
+  ```
+## Evaluation
+Following [GigaGAN](https://github.com/mingukkang/GigaGAN/tree/main/evaluation), we use 30,000 COCO captions to generate 30,000 images for evaluation. FID against COCO val2014 is reported as a metric, and CLIP text cosine similarity is used to reflect the text-image alignment.
+* To evaluate LinFusion, first install the required packages:
+  ```bash
+  pip install git+https://github.com/openai/CLIP.git
+  pip install click clean-fid open_clip_torch
+  ```
+* Download and unzip COCO val2014 to `/path/to/coco`:
+  ```bash
+  wget http://images.cocodataset.org/zips/val2014.zip
+  unzip val2014.zip -d /path/to/coco
+  ```
+* Run `examples/eval/eval.sh` to generate images for evaluation. You may need to specify `outdir`, `repo_id`, `resolution`, etc.
+  ```bash
+  bash examples/eval/eval.sh
+  ```
+* Run `examples/eval/calculate_metrics.sh` to calculate the metrics. You may need to specify `/path/to/coco`, `fake_dir`, etc.
+  ```bash
+  bash examples/eval/calculate_metrics.sh
+  ```
+## ToDo
+- [x] Stable Diffusion 1.5 support.
+- [x] Stable Diffusion 2.1 support. 
+- [x] Stable Diffusion XL support.
+- [x] Release training code for LinFusion.
+- [x] Release evaluation code for LinFusion.
+- [ ] Gradio local interface.
+## Acknowledgement
+* We extend our gratitude to the authors of [SDEdit](https://huggingface.co/docs/diffusers/v0.30.2/en/api/pipelines/stable_diffusion/img2img#image-to-image), [DemoFusion](https://github.com/PRIS-CV/DemoFusion), and [DistriFusion](https://github.com/mit-han-lab/distrifuser) for their contributions, which inspire us a lot on applying LinFusion for high-resolution generation. 
+* Our evaluation codes are adapted from [SiD-LSG](https://github.com/mingyuanzhou/SiD-LSG) and [GigaGAN](https://github.com/mingukkang/GigaGAN/tree/main/evaluation).
+* We thank [@Adamdad](https://github.com/Adamdad), [@yu-rp](https://github.com/yu-rp), and [@czg1225](https://github.com/czg1225) for valuable discussions.
+## Citation
+If you finds this repo is helpful, please consider citing:
+```bib
+@article{liu2024linfusion,
+  title     = {LinFusion: 1 GPU, 1 Minute, 16K Image},
+  author    = {Liu, Songhua and Yu, Weihao and Tan, Zhenxiong and Wang, Xinchao},
+  year      = {2024},
+  eprint    = {2409.02097},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}
+```
--- a/assets/000000.png
+++ b/assets/000000.png
--- a/assets/acc.png
+++ b/assets/acc.png
--- a/assets/astronaut.png
+++ b/assets/astronaut.png
--- a/assets/captions.txt
+++ b/assets/captions.txt
--- a/assets/diffusers_linfusion_example.jpg
+++ b/assets/diffusers_linfusion_example.jpg
--- a/assets/laion_improved_aesthetics_6.5plus_with_images_blip_captions.json
+++ b/assets/laion_improved_aesthetics_6.5plus_with_images_blip_captions.json
--- a/assets/linfusin_overview.png
+++ b/assets/linfusin_overview.png
--- a/assets/picture.png
+++ b/assets/picture.png
--- a/assets/principle.png
+++ b/assets/principle.png
--- a/examples/eval/calculate_metrics.sh
+++ b/examples/eval/calculate_metrics.sh
+how_many=30000
+ref_data="coco2014"
+ref_dir="/path/to/coco/"
+ref_type="val2014"
+eval_res=256
+batch_size=128
+clip_model="ViT-G/14"
+caption_file='assets/captions.txt'
+fake_dir='eval_results/sdxl'
+python -m src.eval.calculate_metrics --how_many $how_many --ref_data $ref_data --ref_dir $ref_dir --ref_type $ref_type --fake_dir $fake_dir --eval_res $eval_res --batch_size $batch_size --clip_model $clip_model --caption_file $caption_file
--- a/examples/eval/eval.sh
+++ b/examples/eval/eval.sh
+torchrun --standalone --nproc_per_node=4 -m src.eval.eval \
+    --outdir='eval_results/sd21' \
+    --seeds=0-29999 \
+    --batch=4 \
+    --repo_id='stabilityai/stable-diffusion-2-1' \
+    --resolution=512 \
+    --guidance_scale=7.5
\ No newline at end of file
--- a/examples/eval/singleDCU_eval.sh
+++ b/examples/eval/singleDCU_eval.sh
+python -m src.eval.eval \
+    --outdir='eval_results/sd21' \
+    --seeds=0-29999 \
+    --batch=4 \
+    --repo_id='stabilityai/stable-diffusion-2-1' \
+    --resolution=512 \
+    --guidance_scale=7.5
\ No newline at end of file
--- a/examples/inference/basic_usage.ipynb
+++ b/examples/inference/basic_usage.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from diffusers import AutoPipelineForText2Image\n",
+    "import torch\n",
+    "\n",
+    "from src.linfusion import LinFusion\n",
+    "from src.tools import seed_everything"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sd_repo = \"Lykon/dreamshaper-8\"\n",
+    "\n",
+    "pipeline = AutoPipelineForText2Image.from_pretrained(\n",
+    "    sd_repo, torch_dtype=torch.float16, variant=\"fp16\"\n",
+    ").to(torch.device(\"cuda\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "linfusion = LinFusion.construct_for(pipeline, pretrained_model_name_or_path=\"Yuanshi/LinFusion-1-5\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "seed_everything(123)\n",
+    "image = pipeline(\n",
+    "\t\"An astronaut floating in space. Beautiful view of the stars and the universe in the background.\"\n",
+    ").images[0]\n",
+    "image"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "new_vc",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/examples/inference/basic_usage_sd2.ipynb
+++ b/examples/inference/basic_usage_sd2.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from diffusers import AutoPipelineForText2Image\n",
+    "import torch\n",
+    "\n",
+    "from src.linfusion import LinFusion\n",
+    "from src.tools import seed_everything"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sd_repo = \"stabilityai/stable-diffusion-2-1\"\n",
+    "\n",
+    "pipeline = AutoPipelineForText2Image.from_pretrained(\n",
+    "    sd_repo, torch_dtype=torch.bfloat16, variant=\"fp16\"\n",
+    ").to(torch.device(\"cuda\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "linfusion = LinFusion.construct_for(pipeline, pretrained_model_name_or_path=\"Yuanshi/LinFusion-2-1\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "seed_everything(123)\n",
+    "image = pipeline(\n",
+    "\t\"An astronaut floating in space. Beautiful view of the stars and the universe in the background.\"\n",
+    ").images[0]\n",
+    "image"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "new_vc",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/examples/inference/basic_usage_sdxl.ipynb
+++ b/examples/inference/basic_usage_sdxl.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from diffusers import AutoPipelineForText2Image\n",
+    "import torch\n",
+    "\n",
+    "from src.linfusion import LinFusion\n",
+    "from src.tools import seed_everything"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sd_repo = \"stabilityai/stable-diffusion-xl-base-1.0\"\n",
+    "\n",
+    "pipeline = AutoPipelineForText2Image.from_pretrained(\n",
+    "    sd_repo, torch_dtype=torch.float16, variant=\"fp16\"\n",
+    ").to(torch.device(\"cuda\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "linfusion = LinFusion.construct_for(pipeline, pretrained_model_name_or_path=\"Yuanshi/LinFusion-XL\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "seed_everything(123)\n",
+    "image = pipeline(\n",
+    "\t\"An astronaut floating in space. Beautiful view of the stars and the universe in the background.\"\n",
+    ").images[0]\n",
+    "image"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "new_vc",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/examples/inference/sdxl_distrifusion_example.py
+++ b/examples/inference/sdxl_distrifusion_example.py
+import torch
+from src.pipelines.pipelines_distrifusion_sdxl import DistriSDXLPipeline
+from src.distrifuser.utils import DistriConfig
+distri_config = DistriConfig(height=1024, width=1024, warmup_steps=4)
+pipeline = DistriSDXLPipeline.from_pretrained(
+    distri_config=distri_config,
+    pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0",
+    variant="fp16",
+    use_safetensors=True,
+)
+pipeline.set_progress_bar_config(disable=distri_config.rank != 0)
+image = pipeline(
+    prompt="Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
+    generator=torch.Generator(device="cuda").manual_seed(233)
+).images[0]
+if distri_config.rank == 0:
+    image.save("astronaut.png")