style: upgrade the linter (#339)

* style: reformated codes * style: reformated codes

style: upgrade the linter (#339)
* style: reformated codes * style: reformated codes
57e50f8d · Muyang Li · GitHub · b737368d · 57e50f8d · 57e50f8d
Unverified Commit 57e50f8d authored May 01, 2025 by Muyang Li Committed by GitHub May 01, 2025
20 changed files
--- a/.clang-format
+++ b/.clang-format
+BasedOnStyle:  LLVM           # K&R / "attach" braces like the code now
+IndentWidth:   4              # 4‑space indents everywhere
+TabWidth:      4
+UseTab:        Never          # never convert to tabs
+ColumnLimit:   120
+AccessModifierOffset: -4
+BreakBeforeBraces: Attach     # `void foo() {` — brace on same line
+BraceWrapping:
+  AfterNamespace:  false      # `namespace x {` on same line
+  SplitEmptyFunction: false
+  SplitEmptyRecord:  false
+  SplitEmptyNamespace: false
+PointerAlignment: Right       # `int *ptr`, `const Foo *bar`
+ReferenceAlignment: Pointer   # `int &ref` -> same rule as pointers
+SortIncludes: false           # keep the hand‑crafted include order
+IncludeBlocks: Preserve
+SortUsingDeclarations: false
+IndentPPDirectives: None      # keep `#pragma` / `#if` at column 0
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine:  false
+AllowShortBlocksOnASingleLine:        false
+BinPackParameters:            false   # one parameter per line (as written)
+BinPackArguments:             false
+AlignAfterOpenBracket: Align  # preserve the current hanging‑indent style
+AlignConsecutiveAssignments:  true
+AlignConsecutiveDeclarations: false
+SpaceAfterTemplateKeyword: false
+BreakTemplateDeclarations: Yes
--- a/.clang-format-ignore
+++ b/.clang-format-ignore
+third_party/*
--- a/.github/ISSUE_TEMPLATE/1-bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/1-bug-report.yml
@@ -37,4 +37,3 @@ body:
    placeholder: "Example: python run_model.py --config config.json"
  validations:
    required: true
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -15,4 +15,4 @@
 - [ ] [README](../README.md) and example scripts in [`examples`](../examples) are updated if necessary.
 - [ ] Throughput/latency benchmarks and quality evaluations are included where applicable.
 - [ ] **For reviewers:** If you're only helping merge the main branch and haven't contributed code to this PR, please remove yourself as a co-author when merging.
 - [ ] Please feel free to join our [Slack](https://join.slack.com/t/nunchaku/shared_invite/zt-3170agzoz-NgZzWaTrEj~n2KEV3Hpl5Q), [Discord](https://discord.gg/Wk6PnwX9Sm) or [WeChat](https://github.com/mit-han-lab/nunchaku/blob/main/assets/wechat.jpg) to discuss your PR.
\ No newline at end of file
--- a/.github/workflows/auto-merge-main-into-dev.yml
+++ b/.github/workflows/auto-merge-main-into-dev.yml
@@ -40,7 +40,7 @@ jobs:
      - name: Merge main into dev
        id: last_commit
        if: steps.check_sync.outputs.skip_merge == 'false'
-        run: |          
+        run: |
          # Get author name and email from last commit on main
          AUTHOR_NAME=$(git log origin/main -1 --pretty=format:'%an')
          AUTHOR_EMAIL=$(git log origin/main -1 --pretty=format:'%ae')
@@ -48,11 +48,11 @@ jobs:
          echo "Author: $AUTHOR_NAME <$AUTHOR_EMAIL>"
          echo "Last commit message: $LAST_MSG"
          # Set Git user to last author
          git config --global user.name "$AUTHOR_NAME"
          git config --global user.email "$AUTHOR_EMAIL"
          git checkout dev
          git merge origin/main -m "[Auto Merge] $LAST_MSG"
          git push origin dev
--- a/.github/workflows/close_inactive_issues.yaml
+++ b/.github/workflows/close_inactive_issues.yaml
@@ -94,4 +94,4 @@ jobs:
              console.log('Finished processing issues');
            }
            await processIssues();
\ No newline at end of file
--- a/.github/workflows/pr_test_ampere.yaml
+++ b/.github/workflows/pr_test_ampere.yaml
@@ -97,7 +97,7 @@ jobs:
          echo "Installing dependencies"
          pip install torch torchvision torchaudio
          pip install ninja wheel diffusers transformers accelerate sentencepiece protobuf huggingface_hub
  build:
    needs: set-up-build-env

--- a/.github/workflows/pr_test_blackwell.yaml
+++ b/.github/workflows/pr_test_blackwell.yaml
@@ -97,7 +97,7 @@ jobs:
          echo "Installing dependencies"
          pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
          pip install ninja wheel diffusers transformers accelerate sentencepiece protobuf huggingface_hub
  build:
    needs: set-up-build-env

--- a/.github/workflows/sync-to-private.yml
+++ b/.github/workflows/sync-to-private.yml
@@ -39,19 +39,19 @@ jobs:
            echo "Skipping [Auto Sync] commit."
            exit 0
          fi
          if [[ "$COMMIT_MSG" == *"[Dont Sync]"* ]]; then
            echo "Skipping [Dont Sync] commit."
            exit 0
          fi
          # Preserve original author and amend commit message
          GIT_AUTHOR_NAME=$(git log --format='%aN' -n 1 $COMMIT)
          GIT_AUTHOR_EMAIL=$(git log --format='%aE' -n 1 $COMMIT)
          git config --global user.name "$GIT_AUTHOR_NAME"
-          git config --global user.email "$GIT_AUTHOR_EMAIL"          
+          git config --global user.email "$GIT_AUTHOR_EMAIL"
          NEW_MSG="[Auto Sync] ${COMMIT_MSG}"
          PARENTS=$(git rev-list --parents -n 1 $COMMIT)

--- a/.gitignore
+++ b/.gitignore
@@ -207,4 +207,4 @@ cython_debug/
 # Pulid
 *.safetensors
 *.onnx
 .gitattributes
\ No newline at end of file
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
+# Adapted from https://github.com/sgl-project/sglang/blob/main/.pre-commit-config.yaml
+default_stages: [ pre-commit, pre-push, manual ]
 repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: check-symlinks
+      - id: destroyed-symlinks
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+        args: [ --allow-multiple-documents ]
+      - id: check-toml
+      - id: check-ast
+      - id: check-added-large-files
+      - id: check-merge-conflict
+      #      - id: check-shebang-scripts-are-executable
+      - id: detect-private-key
+      #      - id: debug-statements
+      - id: no-commit-to-branch
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.3.2
+    rev: v0.11.2
    hooks:
      - id: ruff
-        args: ["--output-format", "github"]
+        args: [ --fixable=F401 ]
+        files: ^(nunchaku/|examples/|tests/|app/)
+        exclude: \.ipynb$
+  - repo: https://github.com/psf/black
+    rev: 24.10.0
+    hooks:
+      - id: black-jupyter
+      - id: black
+        args: [ -l, "120" ]
+        files: ^(nunchaku/|examples/|tests/|app/)
+  - repo: https://github.com/pre-commit/mirrors-clang-format
+    rev: v20.1.3
+    hooks:
+      - id: clang-format
+        types_or: [ c++, cuda ]
+        args: [ --style=file, --verbose ]
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.8.1
+    hooks:
+      - id: nbstripout
+        args:
+          - '--keep-output'
+          - '--extra-keys=metadata.kernelspec metadata.language_info.version'
--- a/LICENCE.txt
+++ b/LICENCE.txt
@@ -198,4 +198,4 @@
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
\ No newline at end of file
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -13,4 +13,4 @@ include third_party/Block-Sparse-Attention/LICENSE
 include third_party/cutlass/LICENSE.txt
 include third_party/json/LICENSE.MIT
 include third_party/mio/LICENSE
 include third_party/spdlog/LICENSE
\ No newline at end of file
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 <a href="http://arxiv.org/abs/2411.05007"><b>Paper</b></a> | <a href="https://hanlab.mit.edu/projects/svdquant"><b>Website</b></a> | <a href="https://hanlab.mit.edu/blog/svdquant"><b>Blog</b></a> | <a href="https://svdquant.mit.edu"><b>Demo</b></a> | <a href="https://huggingface.co/collections/mit-han-lab/svdquant-67493c2c2e62a1fc6e93f45c"><b>HuggingFace</b></a> | <a href="https://modelscope.cn/collections/svdquant-468e8f780c2641"><b>ModelScope</b></a> | <a href="https://github.com/mit-han-lab/ComfyUI-nunchaku"><b>ComfyUI</b></a>
 </h3>
-<h3 align="center"> 
+<h3 align="center">
 <a href="README.md"><b>English</b></a> | <a href="README_ZH.md"><b>中文</b></a>
 </h3>
@@ -53,7 +53,7 @@ https://github.com/user-attachments/assets/fdd4ab68-6489-4c65-8768-259bd866e8f8
 #### Quantization Method -- SVDQuant
-![intuition](https://huggingface.co/mit-han-lab/nunchaku-artifacts/resolve/main/nunchaku/assets/intuition.gif)Overview of SVDQuant. Stage1: Originally, both the activation $\boldsymbol{X}$ and weights $\boldsymbol{W}$ contain outliers, making 4-bit quantization challenging.  Stage 2: We migrate the outliers from activations to weights, resulting in the updated activation $\hat{\boldsymbol{X}}$ and weights $\hat{\boldsymbol{W}}$. While $\hat{\boldsymbol{X}}$ becomes easier to quantize, $\hat{\boldsymbol{W}}$ now becomes more difficult. Stage 3: SVDQuant further decomposes $\hat{\boldsymbol{W}}$ into a low-rank component $\boldsymbol{L}_1\boldsymbol{L}_2$ and a residual $\hat{\boldsymbol{W}}-\boldsymbol{L}_1\boldsymbol{L}_2$ with SVD. Thus, the quantization difficulty is alleviated by the low-rank branch, which runs at 16-bit precision. 
+![intuition](https://huggingface.co/mit-han-lab/nunchaku-artifacts/resolve/main/nunchaku/assets/intuition.gif)Overview of SVDQuant. Stage1: Originally, both the activation $\boldsymbol{X}$ and weights $\boldsymbol{W}$ contain outliers, making 4-bit quantization challenging.  Stage 2: We migrate the outliers from activations to weights, resulting in the updated activation $\hat{\boldsymbol{X}}$ and weights $\hat{\boldsymbol{W}}$. While $\hat{\boldsymbol{X}}$ becomes easier to quantize, $\hat{\boldsymbol{W}}$ now becomes more difficult. Stage 3: SVDQuant further decomposes $\hat{\boldsymbol{W}}$ into a low-rank component $\boldsymbol{L}_1\boldsymbol{L}_2$ and a residual $\hat{\boldsymbol{W}}-\boldsymbol{L}_1\boldsymbol{L}_2$ with SVD. Thus, the quantization difficulty is alleviated by the low-rank branch, which runs at 16-bit precision.
 #### Nunchaku Engine Design
@@ -125,7 +125,7 @@ If you're using a Blackwell GPU (e.g., 50-series GPUs), install a wheel with PyT
   conda activate nunchaku
   pip install torch torchvision torchaudio
   pip install ninja wheel diffusers transformers accelerate sentencepiece protobuf huggingface_hub
   # For gradio demos
   pip install peft opencv-python gradio spaces GPUtil
   ```
@@ -144,9 +144,9 @@ If you're using a Blackwell GPU (e.g., 50-series GPUs), install a wheel with PyT
    ```
    For Windows users, you can download and install the latest [Visual Studio](https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community&channel=Release&version=VS2022&source=VSLandingPage&cid=2030&passive=false).
    Then build the package from source with
    ```shell
    git clone https://github.com/mit-han-lab/nunchaku.git
    cd nunchaku
@@ -154,13 +154,13 @@ If you're using a Blackwell GPU (e.g., 50-series GPUs), install a wheel with PyT
    git submodule update
    python setup.py develop
    ```
    If you are building wheels for distribution, use:
    ```shell
    NUNCHAKU_INSTALL_MODE=ALL NUNCHAKU_BUILD_WHEELS=1 python -m build --wheel --no-isolation
    ```
    Make sure to set the environment variable `NUNCHAKU_INSTALL_MODE` to `ALL`. Otherwise, the generated wheels will only work on GPUs with the same architecture as the build machine.
 ## Usage Example
@@ -256,7 +256,7 @@ image = pipeline(
 image.save(f"flux.1-dev-ghibsky-{precision}.png")
 ```
-To compose multiple LoRAs, you can use `nunchaku.lora.flux.compose.compose_lora` to compose them. The usage is 
+To compose multiple LoRAs, you can use `nunchaku.lora.flux.compose.compose_lora` to compose them. The usage is
 ```python
 composed_lora = compose_lora(
@@ -347,7 +347,7 @@ We thank MIT-IBM Watson AI Lab, MIT and Amazon Science Hub, MIT AI Hardware Prog
 We use [img2img-turbo](https://github.com/GaParmar/img2img-turbo) to train the sketch-to-image LoRA. Our text-to-image and image-to-image UI is built upon [playground-v.25](https://huggingface.co/spaces/playgroundai/playground-v2.5/blob/main/app.py) and [img2img-turbo](https://github.com/GaParmar/img2img-turbo/blob/main/gradio_sketch2image.py), respectively. Our safety checker is borrowed from [hart](https://github.com/mit-han-lab/hart).
-Nunchaku is also inspired by many open-source libraries, including (but not limited to) [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM), [vLLM](https://github.com/vllm-project/vllm), [QServe](https://github.com/mit-han-lab/qserve), [AWQ](https://github.com/mit-han-lab/llm-awq), [FlashAttention-2](https://github.com/Dao-AILab/flash-attention), and [Atom](https://github.com/efeslab/Atom). 
+Nunchaku is also inspired by many open-source libraries, including (but not limited to) [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM), [vLLM](https://github.com/vllm-project/vllm), [QServe](https://github.com/mit-han-lab/qserve), [AWQ](https://github.com/mit-han-lab/llm-awq), [FlashAttention-2](https://github.com/Dao-AILab/flash-attention), and [Atom](https://github.com/efeslab/Atom).
 ## Star History

--- a/README_ZH.md
+++ b/README_ZH.md
-<div align="center" id="nunchaku_logo"> 
+<div align="center" id="nunchaku_logo">
-  <img src="assets/nunchaku.svg" alt="logo" width="220"></img> 
+  <img src="assets/nunchaku.svg" alt="logo" width="220"></img>
-</div> 
+</div>
-<h3 align="center"> 
+<h3 align="center">
-<a href="http://arxiv.org/abs/2411.05007"><b>论文</b></a> | <a href="https://hanlab.mit.edu/projects/svdquant"><b>官网</b></a> | <a href="https://hanlab.mit.edu/blog/svdquant"><b>博客</b></a> | <a href="https://svdquant.mit.edu"><b>演示</b></a> | <a href="https://huggingface.co/collections/mit-han-lab/svdquant-67493c2c2e62a1fc6e93f45c"><b>HuggingFace</b></a> | <a href="https://modelscope.cn/collections/svdquant-468e8f780c2641"><b>ModelScope</b></a> | <a href="https://github.com/mit-han-lab/ComfyUI-nunchaku"><b>ComfyUI</b></a> 
+<a href="http://arxiv.org/abs/2411.05007"><b>论文</b></a> | <a href="https://hanlab.mit.edu/projects/svdquant"><b>官网</b></a> | <a href="https://hanlab.mit.edu/blog/svdquant"><b>博客</b></a> | <a href="https://svdquant.mit.edu"><b>演示</b></a> | <a href="https://huggingface.co/collections/mit-han-lab/svdquant-67493c2c2e62a1fc6e93f45c"><b>HuggingFace</b></a> | <a href="https://modelscope.cn/collections/svdquant-468e8f780c2641"><b>ModelScope</b></a> | <a href="https://github.com/mit-han-lab/ComfyUI-nunchaku"><b>ComfyUI</b></a>
 </h3>
-<h3 align="center"> 
+<h3 align="center">
 <a href="README.md"><b>English</b></a> | <a href="README_ZH.md"><b>中文</b></a>
 </h3>
@@ -121,9 +121,9 @@ pip install https://huggingface.co/mit-han-lab/nunchaku/resolve/main/nunchaku-0.
   conda activate nunchaku
   pip install torch torchvision torchaudio
   pip install ninja wheel diffusers transformers accelerate sentencepiece protobuf huggingface_hub
   # Gradio演示依赖
-   pip install peft opencv-python gradio spaces GPUtil  
+   pip install peft opencv-python gradio spaces GPUtil
   ```
   Blackwell用户需安装PyTorch nightly（CUDA 12.8）：
@@ -140,7 +140,7 @@ pip install https://huggingface.co/mit-han-lab/nunchaku/resolve/main/nunchaku-0.
    ```
    Windows用户请安装最新[Visual Studio](https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community&channel=Release&version=VS2022&source=VSLandingPage&cid=2030&passive=false)。
    编译命令：
    ```shell
@@ -150,13 +150,13 @@ pip install https://huggingface.co/mit-han-lab/nunchaku/resolve/main/nunchaku-0.
    git submodule update
    python setup.py develop
    ```
    打包wheel：
    ```shell
    NUNCHAKU_INSTALL_MODE=ALL NUNCHAKU_BUILD_WHEELS=1 python -m build --wheel --no-isolation
    ```
    设置`NUNCHAKU_INSTALL_MODE=ALL`确保wheel支持所有显卡架构。
 ## 使用示例

--- a/app/flux.1/depth_canny/README.md
+++ b/app/flux.1/depth_canny/README.md
@@ -25,4 +25,4 @@ python run_gradio.py
 * By default, the model is `FLUX.1-Depth-dev`. You can add `-m canny` to switch to `FLUX.1-Canny-dev`.
 * The demo loads the Gemma-2B model as a safety checker by default. To disable this feature, use `--no-safety-checker`.
 * To further reduce GPU memory usage, you can enable the W4A16 text encoder by specifying `--use-qencoder`.
 * By default, we use our INT4 model. Use  `-p bf16` to switch to the BF16 model.
\ No newline at end of file
--- a/app/flux.1/depth_canny/assets/description.html
+++ b/app/flux.1/depth_canny/assets/description.html
 <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
    <div>
        <h1>
-            <img src="https://github.com/mit-han-lab/nunchaku/raw/refs/heads/main/assets/logo.svg"
+            <img src="https://github.com/mit-han-lab/nunchaku/raw/refs/heads/main/assets/svdquant.svg"
                 alt="logo"
                 style="height: 40px; width: auto; display: block; margin: auto;"/>
            INT4 FLUX.1-{model_name}-dev Demo
@@ -49,4 +49,4 @@
        </div>
        {count_info}
    </div>
 </div>
\ No newline at end of file
--- a/app/flux.1/depth_canny/assets/style.css
+++ b/app/flux.1/depth_canny/assets/style.css
@@ -37,4 +37,4 @@ h1 {
 #run_button {
    height: 87px;
 }
\ No newline at end of file
--- a/app/flux.1/depth_canny/run_gradio.py
+++ b/app/flux.1/depth_canny/run_gradio.py
@@ -10,9 +10,6 @@ from controlnet_aux import CannyDetector
 from diffusers import FluxControlPipeline
 from image_gen_aux import DepthPreprocessor
 from PIL import Image
-from nunchaku.models.safety_checker import SafetyChecker
-from nunchaku.models.transformers.transformer_flux import NunchakuFluxTransformer2dModel
 from utils import get_args
 from vars import (
    DEFAULT_GUIDANCE_CANNY,
@@ -28,8 +25,11 @@ from vars import (
    WIDTH,
 )
+from nunchaku.models.safety_checker import SafetyChecker
+from nunchaku.models.transformers.transformer_flux import NunchakuFluxTransformer2dModel
 # import gradio last to avoid conflicts with other imports
-import gradio as gr
+import gradio as gr  # noqa: isort: skip
 args = get_args()
@@ -132,7 +132,7 @@ with gr.Blocks(css_paths="assets/style.css", title=f"SVDQuant Flux.1-{model_name
        device_info = f"Running on {gpu.name} with {memory:.0f} GiB memory."
    else:
        device_info = "Running on CPU 🥶 This demo does not work on CPU."
-    notice = f'<strong>Notice:</strong>&nbsp;We will replace unsafe prompts with a default prompt: "A peaceful world."'
+    notice = '<strong>Notice:</strong>&nbsp;We will replace unsafe prompts with a default prompt: "A peaceful world."'
    def get_header_str():

--- a/app/flux.1/fill/README.md
+++ b/app/flux.1/fill/README.md
@@ -10,4 +10,4 @@ python run_gradio.py
 * The demo loads the Gemma-2B model as a safety checker by default. To disable this feature, use `--no-safety-checker`.
 * To further reduce GPU memory usage, you can enable the W4A16 text encoder by specifying `--use-qencoder`.
 * By default, we use our INT4 model. Use  `-p bf16` to switch to the BF16 model.
\ No newline at end of file