[major] auto lora format detection; upgrade to 0.1.3

4b9c2e03 · muyangli · bafad50d · 4b9c2e03 · 4b9c2e03 · 4b9c2e03
Commit 4b9c2e03 authored Feb 25, 2025 by muyangli
11 changed files
--- a/README.md
+++ b/README.md
@@ -45,16 +45,16 @@ SVDQuant is a post-training quantization technique for 4-bit weights and activat
 ### Wheels (Linux only for now)
-Before installation, ensure you have PyTorch 2.6 installed (support for PyTorch 2.5 wheels will be added later):
+Before installation, ensure you have [PyTorch>=2.5](https://pytorch.org/) installed. For example, you can use the following command to install PyTorch 2.6:
 ```shell
 pip install torch==2.6 torchvision==0.21 torchaudio==2.6
 ```
-Once PyTorch is installed, you can directly install `nunchaku` from our [Hugging Face repository](https://huggingface.co/mit-han-lab/nunchaku/tree/main). Be sure to select the appropriate wheel for your Python version. For example, for Python 3.11:
+Once PyTorch is installed, you can directly install `nunchaku` from our [Hugging Face repository](https://huggingface.co/mit-han-lab/nunchaku/tree/main). Be sure to select the appropriate wheel for your Python and PyTorch version. For example, for Python 3.11 and PyTorch 2.6:
 ```shell
-pip install https://huggingface.co/mit-han-lab/nunchaku/blob/main/nunchaku-0.1.2-cp311-cp311-linux_x86_64.whl
+pip install https://huggingface.co/mit-han-lab/nunchaku/resolve/main/nunchaku-0.1.3+torch2.6-cp311-cp311-linux_x86_64.whl
 ```
 **Note**: NVFP4 wheels are not currently available because PyTorch has not officially supported CUDA 11.8. To use NVFP4, you will need **Blackwell GPUs (e.g., 50-series GPUs)** and must **build from source**.
@@ -133,7 +133,6 @@ Specifically, `nunchaku` shares the same APIs as [diffusers](https://github.com/
 python -m nunchaku.lora.flux.convert \
  --quant-path mit-han-lab/svdq-int4-flux.1-dev/transformer_blocks.safetensors \
  --lora-path aleksa-codes/flux-ghibsky-illustration/lora.safetensors \
-  --lora-format diffusers \
  --output-root ./nunchaku_loras \
  --lora-name svdq-int4-flux.1-dev-ghibsky
 ```
@@ -145,6 +144,7 @@ Argument Details:
 - `--lora-path`: The path to your LoRA safetensors, which can also be a local or remote Hugging Face model.
 - `--lora-format`: Specifies the LoRA format. Supported formats include:
+  - `auto`: The default option. Automatically detects the appropriate LoRA format.
  - `diffusers` (e.g., [aleksa-codes/flux-ghibsky-illustration](https://huggingface.co/aleksa-codes/flux-ghibsky-illustration))
  - `comfyui` (e.g., [Shakker-Labs/FLUX.1-dev-LoRA-Children-Simple-Sketch](https://huggingface.co/Shakker-Labs/FLUX.1-dev-LoRA-Children-Simple-Sketch))
  - `xlab` (e.g., [XLabs-AI/flux-RealismLora](https://huggingface.co/XLabs-AI/flux-RealismLora))

--- a/comfyui/README.md
+++ b/comfyui/README.md
@@ -3,7 +3,11 @@
 ![comfyui](../assets/comfyui.jpg)
 ## Installation
-Please first install `nunchaku` following the instructions in [README.md](https://github.com/mit-han-lab/nunchaku?tab=readme-ov-file#installation).
+Please first install `nunchaku` following the instructions in [README.md](https://github.com/mit-han-lab/nunchaku?tab=readme-ov-file#installation). Then just install `image_gen_aux` with 
+```shell
+pip install git+https://github.com/asomoza/image_gen_aux.git
+```
 ### ComfyUI-CLI
@@ -102,11 +106,11 @@ comfy node registry-install svdquant
  * Place your LoRA checkpoints in the `models/loras` directory. These will appear as selectable options under `lora_name`. Meanwhile, the [example Ghibsky LoRA](https://huggingface.co/aleksa-codes/flux-ghibsky-illustration) is included and will automatically download from our Hugging Face repository when used.
  * `lora_format` specifies the LoRA format. Supported formats include:
+* `auto`: Automatically detects the appropriate LoRA format.
-    - `diffusers` (e.g., [aleksa-codes/flux-ghibsky-illustration](https://huggingface.co/aleksa-codes/flux-ghibsky-illustration))
+    * `diffusers` (e.g., [aleksa-codes/flux-ghibsky-illustration](https://huggingface.co/aleksa-codes/flux-ghibsky-illustration))
-    - `comfyui` (e.g., [Shakker-Labs/FLUX.1-dev-LoRA-Children-Simple-Sketch](https://huggingface.co/Shakker-Labs/FLUX.1-dev-LoRA-Children-Simple-Sketch))
+    * `comfyui` (e.g., [Shakker-Labs/FLUX.1-dev-LoRA-Children-Simple-Sketch](https://huggingface.co/Shakker-Labs/FLUX.1-dev-LoRA-Children-Simple-Sketch))
-    - `xlab` (e.g., [XLabs-AI/flux-RealismLora](https://huggingface.co/XLabs-AI/flux-RealismLora))
+    * `xlab` (e.g., [XLabs-AI/flux-RealismLora](https://huggingface.co/XLabs-AI/flux-RealismLora))
-    - `svdquant` (e.g., [mit-han-lab/svdquant-lora-collection](https://huggingface.co/mit-han-lab/svdquant-lora-collection)).
+    * `svdquant` (e.g., [mit-han-lab/svdquant-lora-collection](https://huggingface.co/mit-han-lab/svdquant-lora-collection)).
  * `base_model_name` specifies the path to the quantized base model. If `lora_format` is already set to `svdquant`, this option has no use. You can set it to the same value as `model_path` in the above **SVDQuant Flux DiT Loader**.
  * **Note**: Currently, **only one LoRA** can be loaded at a time.

--- a/comfyui/nodes/lora/flux.py
+++ b/comfyui/nodes/lora/flux.py
@@ -6,6 +6,7 @@ from safetensors.torch import save_file
 from nunchaku.lora.flux.comfyui_converter import comfyui2diffusers
 from nunchaku.lora.flux.diffusers_converter import convert_to_nunchaku_flux_lowrank_dict
+from nunchaku.lora.flux.utils import detect_format
 from nunchaku.lora.flux.xlab_converter import xlab2diffusers
@@ -43,7 +44,10 @@ class SVDQuantFluxLoraLoader:
            "required": {
                "model": ("MODEL", {"tooltip": "The diffusion model the LoRA will be applied to."}),
                "lora_name": (lora_name_list, {"tooltip": "The name of the LoRA."}),
-                "lora_format": (["comfyui", "diffusers", "svdquant", "xlab"], {"tooltip": "The format of the LoRA."}),
+                "lora_format": (
+                    ["auto", "comfyui", "diffusers", "svdquant", "xlab"],
+                    {"tooltip": "The format of the LoRA."},
+                ),
                "base_model_name": (
                    base_model_paths,
                    {
@@ -89,6 +93,8 @@ class SVDQuantFluxLoraLoader:
                    lora_path = folder_paths.get_full_path_or_raise("loras", lora_name)
                except FileNotFoundError:
                    lora_path = lora_name
+                if lora_format == "auto":
+                    lora_format = detect_format(lora_path)
                if lora_format != "svdquant":
                    if lora_format == "comfyui":
                        input_lora = comfyui2diffusers(lora_path)

--- a/comfyui/pyproject.toml
+++ b/comfyui/pyproject.toml
 [project]
 name = "svdquant"
 description = "SVDQuant ComfyUI Node. SVDQuant is a new post-training training quantization paradigm for diffusion models, which quantize both the weights and activations of FLUX.1 to 4 bits, achieving 3.5× memory and 8.7× latency reduction on a 16GB laptop 4090 GPU."
-version = "0.1.1"
+version = "0.1.3"
 license = { file = "LICENSE.txt" }
 dependencies = []
 requires-python = ">=3.11, <3.13"

--- a/comfyui/requirements.txt
+++ b/comfyui/requirements.txt
@@ -4,4 +4,3 @@ accelerate
 sentencepiece
 protobuf
 huggingface_hub
-git+https://github.com/asomoza/image_gen_aux.git
\ No newline at end of file
--- a/comfyui/workflows/svdq-flux.1-dev.json
+++ b/comfyui/workflows/svdq-flux.1-dev.json
@@ -534,7 +534,7 @@
      },
      "widgets_values": [
        "aleksa-codes/flux-ghibsky-illustration/lora.safetensors",
-        "diffusers",
+        "auto",
        "mit-han-lab/svdq-int4-flux.1-dev",
        1
      ]

--- a/nunchaku/__version__.py
+++ b/nunchaku/__version__.py
-__version__ = "0.1.2"
+__version__ = "0.1.3"
--- a/nunchaku/lora/flux/comfyui_converter.py
+++ b/nunchaku/lora/flux/comfyui_converter.py
@@ -17,10 +17,9 @@ def comfyui2diffusers(
        tensors = input_lora
    new_tensors = {}
-    max_alpha = 0
+    max_rank = 0
    for k, v in tensors.items():
        if "alpha" in k:
-            max_alpha = max(max_alpha, v.max().item())
            continue
        new_k = k.replace("lora_down", "lora_A").replace("lora_up", "lora_B")
        if "lora_unet_double_blocks_" in k:
@@ -31,7 +30,10 @@ def comfyui2diffusers(
                        # Copy the tensor
                        new_k = new_k.replace("_img_attn_qkv", f".attn.to_{p}")
                        new_k = new_k.replace("_txt_attn_qkv", f".attn.add_{p}_proj")
-                        new_tensors[new_k] = v.clone()
+                        rank = v.shape[0]
+                        alpha = tensors[k.replace("lora_down.weight", "alpha")]
+                        new_tensors[new_k] = v.clone() * alpha / rank
+                        max_rank = max(max_rank, rank)
                    else:
                        assert "lora_B" in new_k
                        assert v.shape[0] % 3 == 0
@@ -60,7 +62,10 @@ def comfyui2diffusers(
                            new_k1 = new_k.replace("_linear1", ".proj_mlp")
                        else:
                            new_k1 = new_k.replace("_linear1", f".attn.to_{p}")
-                        new_tensors[new_k1] = v.clone()
+                        rank = v.shape[0]
+                        alpha = tensors[k.replace("lora_down.weight", "alpha")]
+                        new_tensors[new_k1] = v.clone() * alpha / rank
+                        max_rank = max(max_rank, rank)
                    else:
                        if p == "i":
                            new_k1 = new_k.replace("_linear1", ".proj_mlp")
@@ -72,6 +77,11 @@ def comfyui2diffusers(
            else:
                new_k = new_k.replace("_linear2", ".proj_out")
                new_k = new_k.replace("_modulation_lin", ".norm.linear")
+                if "lora_down" in k:
+                    rank = v.shape[0]
+                    alpha = tensors[k.replace("lora_down.weight", "alpha")]
+                    v = v * alpha / rank
+                    max_rank = max(max_rank, rank)
                new_tensors[new_k] = v
    if min_rank is not None:

--- a/nunchaku/lora/flux/convert.py
+++ b/nunchaku/lora/flux/convert.py
@@ -6,6 +6,7 @@ from safetensors.torch import save_file
 from .comfyui_converter import comfyui2diffusers
 from .diffusers_converter import convert_to_nunchaku_flux_lowrank_dict
+from .utils import detect_format
 from .xlab_converter import xlab2diffusers
 from ...utils import filter_state_dict, load_state_dict_in_safetensors
@@ -21,8 +22,8 @@ if __name__ == "__main__":
    parser.add_argument(
        "--lora-format",
        type=str,
-        default="diffusers",
+        default="auto",
-        choices=["comfyui", "diffusers", "xlab"],
+        choices=["auto", "comfyui", "diffusers", "xlab"],
        help="format of the LoRA weights",
    )
    parser.add_argument("--output-root", type=str, default="", help="root to the output safetensor file")
@@ -53,6 +54,13 @@ if __name__ == "__main__":
    orig_state_dict = load_state_dict_in_safetensors(args.quant_path)
    lora_format = args.lora_format
+    if lora_format == "auto":
+        lora_format = detect_format(args.lora_path)
+        print(f"Detected LoRA format: {lora_format}")
+        if lora_format == "svdquant":
+            print("Already in SVDQuant format, no conversion needed.")
+            exit(0)
    if lora_format == "diffusers":
        extra_lora_dict = load_state_dict_in_safetensors(args.lora_path)
    else:

--- a/nunchaku/lora/flux/utils.py
+++ b/nunchaku/lora/flux/utils.py
+import torch
+from ...utils import load_state_dict_in_safetensors
+def detect_format(lora: str | dict[str, torch.Tensor]) -> str:
+    if isinstance(lora, str):
+        tensors = load_state_dict_in_safetensors(lora, device="cpu")
+    else:
+        tensors = lora
+    for k in tensors.keys():
+        if "lora_unet_double_blocks_" in k or "lora_unet_single_blocks" in k:
+            return "comfyui"
+        elif "mlp_fc" in k or "mlp_context_fc1" in k:
+            return "svdquant"
+        elif "double_blocks." in k or "single_blocks." in k:
+            return "xlab"
+        elif "transformer." in k:
+            return "diffusers"
+    raise ValueError("Unknown format, please provide the format explicitly.")
--- a/setup.py
+++ b/setup.py
@@ -62,6 +62,10 @@ if __name__ == "__main__":
    fp = open("nunchaku/__version__.py", "r").read()
    version = eval(fp.strip().split()[-1])
+    torch_version = torch.__version__.split("+")[0]
+    torch_major_minor_version = ".".join(torch_version.split(".")[:2])
+    version = version + "+torch" + torch_major_minor_version
    ROOT_DIR = os.path.dirname(__file__)
    INCLUDE_DIRS = [