[major] support flux.1-tools

6c333071 · muyangli · e9ad0535 · 6c333071 · 6c333071 · 6c333071
Commit 6c333071 authored Feb 04, 2025 by muyangli
8 changed files
--- a/README.md
+++ b/README.md
@@ -4,6 +4,7 @@ Nunchaku is an inference engine designed for 4-bit diffusion models, as demonstr

 ### [Paper](http://arxiv.org/abs/2411.05007) | [Project](https://hanlab.mit.edu/projects/svdquant) | [Blog](https://hanlab.mit.edu/blog/svdquant) | [Demo](https://svdquant.mit.edu)

+- **[2025-02-04]** **🚀 4-bit [FLUX.1-tools](https://blackforestlabs.ai/flux-1-tools/) is here!** Enjoy a **2-3× speedup** over the original models. Check out the [examples](./examples) for usage. **Gradio demo and ComfyUI integration are coming soon!**
 - **[2025-01-23]** 🚀 **4-bit [SANA](https://nvlabs.github.io/Sana/) support is here!** Experience a 2-3× speedup compared to the 16-bit model. Check out the [usage example](./examples/sana_1600m_pag.py) and the [deployment guide](app/sana/t2i) for more details. Explore our live demo at [svdquant.mit.edu](https://svdquant.mit.edu)!
 - **[2025-01-22]** 🎉 [**SVDQuant**](http://arxiv.org/abs/2411.05007) has been accepted to **ICLR 2025**!
 - **[2024-12-08]** Support [ComfyUI](https://github.com/comfyanonymous/ComfyUI). Please check [comfyui/README.md](comfyui/README.md) for the usage.
@@ -141,7 +142,7 @@ Please refer to [app/flux/t2i/README.md](app/flux/t2i/README.md) for instruction
 - [x] Comfy UI node
 - [ ] Customized LoRA conversion instructions
 - [ ] Customized model quantization instructions
- [ ] FLUX.1 tools support
+- [x] FLUX.1 tools support
 - [ ] Modularization
 - [ ] IP-Adapter integration
 - [ ] Video Model support

--- a/app/flux.1/t2i/run_gradio.py
+++ b/app/flux.1/t2i/run_gradio.py
@@ -262,7 +262,7 @@ with gr.Blocks(
        fn=generate_func,
        inputs=input_args,
        outputs=[*image_results, *latency_results],
-        api_name="run",
+        api_name=False,
    )
    randomize_seed.click(
        lambda: random.randint(0, MAX_SEED), inputs=[], outputs=seed, api_name=False, queue=False

--- a/app/sana/t2i/run_gradio.py
+++ b/app/sana/t2i/run_gradio.py
@@ -195,7 +195,7 @@ with gr.Blocks(
        fn=generate,
        inputs=input_args,
        outputs=[*image_results, *latency_results],
-        api_name="run",
+        api_name=False,
    )
    randomize_seed.click(
        lambda: random.randint(0, MAX_SEED), inputs=[], outputs=seed, api_name=False, queue=False

--- a/examples/flux.1-canny-dev.py
+++ b/examples/flux.1-canny-dev.py
+import torch
+from controlnet_aux import CannyDetector
+from diffusers import FluxControlPipeline
+from diffusers.utils import load_image
+
+from nunchaku.models.transformer_flux import NunchakuFluxTransformer2dModel
+
+transformer = NunchakuFluxTransformer2dModel.from_pretrained("mit-han-lab/svdq-int4-flux.1-canny-dev")
+pipe = FluxControlPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-Canny-dev", transformer=transformer, torch_dtype=torch.bfloat16
+).to("cuda")
+
+prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
+control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
+
+processor = CannyDetector()
+control_image = processor(
+    control_image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024
+)
+
+image = pipe(
+    prompt=prompt, control_image=control_image, height=1024, width=1024, num_inference_steps=50, guidance_scale=30.0
+).images[0]
+image.save("flux.1-canny-dev.png")
--- a/examples/flux.1-depth-dev.py
+++ b/examples/flux.1-depth-dev.py
+import torch
+from diffusers import FluxControlPipeline
+from diffusers.utils import load_image
+from image_gen_aux import DepthPreprocessor
+
+from nunchaku.models.transformer_flux import NunchakuFluxTransformer2dModel
+
+transformer = NunchakuFluxTransformer2dModel.from_pretrained("mit-han-lab/svdq-int4-flux.1-depth-dev")
+
+pipe = FluxControlPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-Depth-dev",
+    transformer=transformer,
+    torch_dtype=torch.bfloat16,
+).to("cuda")
+
+prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
+control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
+
+processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
+control_image = processor(control_image)[0].convert("RGB")
+
+image = pipe(
+    prompt=prompt, control_image=control_image, height=1024, width=1024, num_inference_steps=30, guidance_scale=10.0
+).images[0]
+image.save("flux.1-depth-dev.png")
--- a/examples/flux.1-fill-dev.py
+++ b/examples/flux.1-fill-dev.py
+import torch
+from diffusers import FluxFillPipeline
+from diffusers.utils import load_image
+
+from nunchaku.models.transformer_flux import NunchakuFluxTransformer2dModel
+
+image = load_image("https://huggingface.co/mit-han-lab/svdq-int4-flux.1-fill-dev/resolve/main/example.png")
+mask = load_image("https://huggingface.co/mit-han-lab/svdq-int4-flux.1-fill-dev/resolve/main/mask.png")
+
+transformer = NunchakuFluxTransformer2dModel.from_pretrained("mit-han-lab/svdq-int4-flux.1-fill-dev")
+pipe = FluxFillPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-Fill-dev", transformer=transformer, torch_dtype=torch.bfloat16
+).to("cuda")
+image = pipe(
+    prompt="A wooden basked of several individual cartons of blueberries.",
+    image=image,
+    mask_image=mask,
+    height=1024,
+    width=1024,
+    guidance_scale=30,
+    num_inference_steps=50,
+    max_sequence_length=512,
+).images[0]
+image.save("flux.1-fill-dev-int4.png")
--- a/nunchaku/__version__.py
+++ b/nunchaku/__version__.py
-__version__ = "0.0.2beta1"
+__version__ = "0.0.2beta2"
--- a/nunchaku/models/transformer_flux.py
+++ b/nunchaku/models/transformer_flux.py
@@ -120,6 +120,7 @@ class NunchakuFluxTransformer2dModel(FluxTransformer2DModel, NunchakuModelLoader
        self,
        patch_size: int = 1,
        in_channels: int = 64,
+        out_channels: int | None = None,
        num_layers: int = 19,
        num_single_layers: int = 38,
        attention_head_dim: int = 128,
@@ -132,6 +133,7 @@ class NunchakuFluxTransformer2dModel(FluxTransformer2DModel, NunchakuModelLoader
        super(NunchakuFluxTransformer2dModel, self).__init__(
            patch_size=patch_size,
            in_channels=in_channels,
+            out_channels=out_channels,
            num_layers=0,
            num_single_layers=0,
            attention_head_dim=attention_head_dim,