Unverified Commit ad8097b9 authored by Muyang Li's avatar Muyang Li Committed by GitHub
Browse files

Release v0.2.0

Ready to release v0.2.0
parents 804a6d30 998192ca
{
"last_node_id": 45,
"last_link_id": 88,
"nodes": [
{
"id": 7,
"type": "CLIPTextEncode",
"pos": [
307,
282
],
"size": [
425.27801513671875,
180.6060791015625
],
"flags": {
"collapsed": true
},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "clip",
"localized_name": "clip",
"label": "clip",
"type": "CLIP",
"link": 63
}
],
"outputs": [
{
"name": "CONDITIONING",
"localized_name": "CONDITIONING",
"label": "CONDITIONING",
"type": "CONDITIONING",
"links": [
68
],
"slot_index": 0
}
],
"title": "CLIP Text Encode (Negative Prompt)",
"properties": {
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
""
],
"color": "#322",
"bgcolor": "#533"
},
{
"id": 34,
"type": "DualCLIPLoader",
"pos": [
-238,
112
],
"size": [
315,
122
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "CLIP",
"localized_name": "CLIP",
"label": "CLIP",
"type": "CLIP",
"links": [
62,
63
]
}
],
"properties": {
"Node name for S&R": "DualCLIPLoader"
},
"widgets_values": [
"clip_l.safetensors",
"t5xxl_fp16.safetensors",
"flux",
"default"
]
},
{
"id": 26,
"type": "FluxGuidance",
"pos": [
621,
8
],
"size": [
317.4000244140625,
58
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "conditioning",
"localized_name": "conditioning",
"label": "conditioning",
"type": "CONDITIONING",
"link": 41
}
],
"outputs": [
{
"name": "CONDITIONING",
"localized_name": "CONDITIONING",
"label": "CONDITIONING",
"type": "CONDITIONING",
"shape": 3,
"links": [
67
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "FluxGuidance"
},
"widgets_values": [
10
]
},
{
"id": 3,
"type": "KSampler",
"pos": [
1280,
100
],
"size": [
315,
262
],
"flags": {},
"order": 11,
"mode": 0,
"inputs": [
{
"name": "model",
"localized_name": "model",
"label": "model",
"type": "MODEL",
"link": 78
},
{
"name": "positive",
"localized_name": "positive",
"label": "positive",
"type": "CONDITIONING",
"link": 64
},
{
"name": "negative",
"localized_name": "negative",
"label": "negative",
"type": "CONDITIONING",
"link": 65
},
{
"name": "latent_image",
"localized_name": "latent_image",
"label": "latent_image",
"type": "LATENT",
"link": 73
}
],
"outputs": [
{
"name": "LATENT",
"localized_name": "LATENT",
"label": "LATENT",
"type": "LATENT",
"links": [
7
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "KSampler"
},
"widgets_values": [
69796511068157,
"randomize",
20,
1,
"euler",
"normal",
1
]
},
{
"id": 43,
"type": "PreviewImage",
"pos": [
1001.3873291015625,
432.09039306640625
],
"size": [
571.5869140625,
625.5296020507812
],
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "images",
"localized_name": "images",
"label": "images",
"type": "IMAGE",
"link": 87
}
],
"outputs": [],
"properties": {
"Node name for S&R": "PreviewImage"
},
"widgets_values": []
},
{
"id": 8,
"type": "VAEDecode",
"pos": [
1620,
98
],
"size": [
210,
46
],
"flags": {},
"order": 12,
"mode": 0,
"inputs": [
{
"name": "samples",
"localized_name": "samples",
"label": "samples",
"type": "LATENT",
"link": 7
},
{
"name": "vae",
"localized_name": "vae",
"label": "vae",
"type": "VAE",
"link": 60
}
],
"outputs": [
{
"name": "IMAGE",
"localized_name": "IMAGE",
"label": "IMAGE",
"type": "IMAGE",
"links": [
85
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 44,
"type": "SaveImage",
"pos": [
1912.7984619140625,
109.0069580078125
],
"size": [
828.9535522460938,
893.8475341796875
],
"flags": {},
"order": 13,
"mode": 0,
"inputs": [
{
"name": "images",
"localized_name": "images",
"label": "images",
"type": "IMAGE",
"link": 85
}
],
"outputs": [],
"properties": {},
"widgets_values": [
"ComfyUI"
]
},
{
"id": 42,
"type": "ImageScale",
"pos": [
174.98765563964844,
450.5818786621094
],
"size": [
315,
130
],
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "image",
"localized_name": "image",
"type": "IMAGE",
"link": 82
}
],
"outputs": [
{
"name": "IMAGE",
"localized_name": "IMAGE",
"type": "IMAGE",
"links": [
86
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "ImageScale"
},
"widgets_values": [
"nearest-exact",
1024,
1024,
"center"
]
},
{
"id": 23,
"type": "CLIPTextEncode",
"pos": [
115,
-17
],
"size": [
422.84503173828125,
164.31304931640625
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "clip",
"localized_name": "clip",
"label": "clip",
"type": "CLIP",
"link": 62
}
],
"outputs": [
{
"name": "CONDITIONING",
"localized_name": "CONDITIONING",
"label": "CONDITIONING",
"type": "CONDITIONING",
"links": [
41
],
"slot_index": 0
}
],
"title": "CLIP Text Encode (Positive Prompt)",
"properties": {
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"ethereal fantasy concept art of A logo of 'MIT HAN Lab'. magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy"
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 32,
"type": "VAELoader",
"pos": [
630.5574951171875,
280.441650390625
],
"size": [
315,
58
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"localized_name": "VAE",
"label": "VAE",
"type": "VAE",
"links": [
60,
69
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"ae.safetensors"
]
},
{
"id": 35,
"type": "InstructPixToPixConditioning",
"pos": [
1008,
118
],
"size": [
235.1999969482422,
86
],
"flags": {},
"order": 10,
"mode": 0,
"inputs": [
{
"name": "positive",
"localized_name": "positive",
"label": "positive",
"type": "CONDITIONING",
"link": 67
},
{
"name": "negative",
"localized_name": "negative",
"label": "negative",
"type": "CONDITIONING",
"link": 68
},
{
"name": "vae",
"localized_name": "vae",
"label": "vae",
"type": "VAE",
"link": 69
},
{
"name": "pixels",
"localized_name": "pixels",
"label": "pixels",
"type": "IMAGE",
"link": 88
}
],
"outputs": [
{
"name": "positive",
"localized_name": "positive",
"label": "positive",
"type": "CONDITIONING",
"links": [
64
],
"slot_index": 0
},
{
"name": "negative",
"localized_name": "negative",
"label": "negative",
"type": "CONDITIONING",
"links": [
65
],
"slot_index": 1
},
{
"name": "latent",
"localized_name": "latent",
"label": "latent",
"type": "LATENT",
"links": [
73
],
"slot_index": 2
}
],
"properties": {
"Node name for S&R": "InstructPixToPixConditioning"
},
"widgets_values": []
},
{
"id": 45,
"type": "SVDQuantDepthPreprocessor",
"pos": [
620.8667602539062,
445.8307189941406
],
"size": [
315,
58
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "image",
"localized_name": "image",
"type": "IMAGE",
"link": 86
}
],
"outputs": [
{
"name": "IMAGE",
"localized_name": "IMAGE",
"type": "IMAGE",
"links": [
87,
88
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "FluxDepthPreprocessor"
},
"widgets_values": [
"LiheYoung/depth-anything-large-hf"
]
},
{
"id": 17,
"type": "LoadImage",
"pos": [
-152.99026489257812,
409.8635559082031
],
"size": [
315,
314.0000305175781
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"localized_name": "IMAGE",
"label": "IMAGE",
"type": "IMAGE",
"shape": 3,
"links": [
82
],
"slot_index": 0
},
{
"name": "MASK",
"localized_name": "MASK",
"label": "MASK",
"type": "MASK",
"shape": 3,
"links": null
}
],
"properties": {
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"logo_example.png",
"image"
]
},
{
"id": 39,
"type": "SVDQuantFluxDiTLoader",
"pos": [
707.80908203125,
-172.0343017578125
],
"size": [
315,
106
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"localized_name": "MODEL",
"type": "MODEL",
"links": [
78
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "SVDQuantFluxDiTLoader"
},
"widgets_values": [
"mit-han-lab/svdq-int4-flux.1-depth-dev",
"disable",
0
]
}
],
"links": [
[
7,
3,
0,
8,
0,
"LATENT"
],
[
41,
23,
0,
26,
0,
"CONDITIONING"
],
[
60,
32,
0,
8,
1,
"VAE"
],
[
62,
34,
0,
23,
0,
"CLIP"
],
[
63,
34,
0,
7,
0,
"CLIP"
],
[
64,
35,
0,
3,
1,
"CONDITIONING"
],
[
65,
35,
1,
3,
2,
"CONDITIONING"
],
[
67,
26,
0,
35,
0,
"CONDITIONING"
],
[
68,
7,
0,
35,
1,
"CONDITIONING"
],
[
69,
32,
0,
35,
2,
"VAE"
],
[
73,
35,
2,
3,
3,
"LATENT"
],
[
78,
39,
0,
3,
0,
"MODEL"
],
[
82,
17,
0,
42,
0,
"IMAGE"
],
[
85,
8,
0,
44,
0,
"IMAGE"
],
[
86,
42,
0,
45,
0,
"IMAGE"
],
[
87,
45,
0,
43,
0,
"IMAGE"
],
[
88,
45,
0,
35,
3,
"IMAGE"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 0.8140274938684042,
"offset": [
1795.999020278545,
750.1636967541119
]
},
"node_versions": {
"comfy-core": "0.3.24"
}
},
"version": 0.4
}
\ No newline at end of file
This diff is collapsed.
{
"last_node_id": 58,
"last_link_id": 108,
"nodes": [
{
"id": 8,
"type": "VAEDecode",
"pos": [
1620,
98
],
"size": [
210,
46
],
"flags": {},
"order": 11,
"mode": 0,
"inputs": [
{
"name": "samples",
"localized_name": "samples",
"type": "LATENT",
"link": 7
},
{
"name": "vae",
"localized_name": "vae",
"type": "VAE",
"link": 60
}
],
"outputs": [
{
"name": "IMAGE",
"localized_name": "IMAGE",
"type": "IMAGE",
"links": [
95
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 38,
"type": "InpaintModelConditioning",
"pos": [
952,
78
],
"size": [
302.4000244140625,
138
],
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "positive",
"localized_name": "positive",
"type": "CONDITIONING",
"link": 80
},
{
"name": "negative",
"localized_name": "negative",
"type": "CONDITIONING",
"link": 81
},
{
"name": "vae",
"localized_name": "vae",
"type": "VAE",
"link": 82
},
{
"name": "pixels",
"localized_name": "pixels",
"type": "IMAGE",
"link": 107
},
{
"name": "mask",
"localized_name": "mask",
"type": "MASK",
"link": 108
}
],
"outputs": [
{
"name": "positive",
"localized_name": "positive",
"type": "CONDITIONING",
"links": [
77
],
"slot_index": 0
},
{
"name": "negative",
"localized_name": "negative",
"type": "CONDITIONING",
"links": [
78
],
"slot_index": 1
},
{
"name": "latent",
"localized_name": "latent",
"type": "LATENT",
"links": [
88
],
"slot_index": 2
}
],
"properties": {
"Node name for S&R": "InpaintModelConditioning"
},
"widgets_values": [
false
]
},
{
"id": 3,
"type": "KSampler",
"pos": [
1280,
100
],
"size": [
315,
262
],
"flags": {},
"order": 10,
"mode": 0,
"inputs": [
{
"name": "model",
"localized_name": "model",
"type": "MODEL",
"link": 102
},
{
"name": "positive",
"localized_name": "positive",
"type": "CONDITIONING",
"link": 77
},
{
"name": "negative",
"localized_name": "negative",
"type": "CONDITIONING",
"link": 78
},
{
"name": "latent_image",
"localized_name": "latent_image",
"type": "LATENT",
"link": 88
}
],
"outputs": [
{
"name": "LATENT",
"localized_name": "LATENT",
"type": "LATENT",
"links": [
7
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "KSampler"
},
"widgets_values": [
482487939694684,
"randomize",
20,
1,
"euler",
"normal",
1
]
},
{
"id": 26,
"type": "FluxGuidance",
"pos": [
596,
48
],
"size": [
317.4000244140625,
58
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "conditioning",
"localized_name": "conditioning",
"type": "CONDITIONING",
"link": 41
}
],
"outputs": [
{
"name": "CONDITIONING",
"localized_name": "CONDITIONING",
"type": "CONDITIONING",
"shape": 3,
"links": [
80
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "FluxGuidance"
},
"widgets_values": [
30
]
},
{
"id": 7,
"type": "CLIPTextEncode",
"pos": [
165,
267
],
"size": [
425.27801513671875,
180.6060791015625
],
"flags": {
"collapsed": true
},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "clip",
"localized_name": "clip",
"type": "CLIP",
"link": 63
}
],
"outputs": [
{
"name": "CONDITIONING",
"localized_name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
81
],
"slot_index": 0
}
],
"title": "CLIP Text Encode (Negative Prompt)",
"properties": {
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
""
],
"color": "#322",
"bgcolor": "#533"
},
{
"id": 34,
"type": "DualCLIPLoader",
"pos": [
-237,
76
],
"size": [
315,
122
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "CLIP",
"localized_name": "CLIP",
"type": "CLIP",
"links": [
62,
63
]
}
],
"properties": {
"Node name for S&R": "DualCLIPLoader"
},
"widgets_values": [
"clip_l.safetensors",
"t5xxl_fp16.safetensors",
"flux",
"default"
]
},
{
"id": 58,
"type": "ImageAndMaskResizeNode",
"pos": [
536.786865234375,
328.54388427734375
],
"size": [
315,
174
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "image",
"localized_name": "image",
"type": "IMAGE",
"link": 105
},
{
"name": "mask",
"localized_name": "mask",
"type": "MASK",
"link": 106
}
],
"outputs": [
{
"name": "image",
"localized_name": "image",
"type": "IMAGE",
"links": [
107
],
"slot_index": 0
},
{
"name": "mask",
"localized_name": "mask",
"type": "MASK",
"links": [
108
],
"slot_index": 1
}
],
"properties": {
"Node name for S&R": "ImageAndMaskResizeNode"
},
"widgets_values": [
1024,
1024,
"nearest-exact",
"center",
10
]
},
{
"id": 23,
"type": "CLIPTextEncode",
"pos": [
144,
-7
],
"size": [
422.84503173828125,
164.31304931640625
],
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "clip",
"localized_name": "clip",
"type": "CLIP",
"link": 62
}
],
"outputs": [
{
"name": "CONDITIONING",
"localized_name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
41
],
"slot_index": 0
}
],
"title": "CLIP Text Encode (Positive Prompt)",
"properties": {
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"A wooden basket of a cat."
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 48,
"type": "Note",
"pos": [
210.1423797607422,
572.7574462890625
],
"size": [
266.4635925292969,
132.3040771484375
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [],
"properties": {
"text": ""
},
"widgets_values": [
"To add mask for fill inpainting, right click on the uploaded image and select \"Open in MaskEditor\". Use the brush tool to add masking and click save to continue."
],
"color": "#432",
"bgcolor": "#653"
},
{
"id": 45,
"type": "SVDQuantFluxDiTLoader",
"pos": [
936.3029174804688,
-113.06819915771484
],
"size": [
315,
106
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"localized_name": "MODEL",
"type": "MODEL",
"links": [
102
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "SVDQuantFluxDiTLoader"
},
"widgets_values": [
"mit-han-lab/svdq-int4-flux.1-fill-dev",
"disable",
0
]
},
{
"id": 17,
"type": "LoadImage",
"pos": [
-225.73123168945312,
316.9361267089844
],
"size": [
423.5578308105469,
437.250732421875
],
"flags": {
"collapsed": false
},
"order": 4,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"localized_name": "IMAGE",
"type": "IMAGE",
"shape": 3,
"links": [
105
],
"slot_index": 0
},
{
"name": "MASK",
"localized_name": "MASK",
"type": "MASK",
"shape": 3,
"links": [
106
],
"slot_index": 1
}
],
"properties": {
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"clipspace/clipspace-mask-331829.799999997.png [input]",
"image"
]
},
{
"id": 32,
"type": "VAELoader",
"pos": [
953.8762817382812,
440.3467102050781
],
"size": [
315,
58
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"localized_name": "VAE",
"type": "VAE",
"links": [
60,
82
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"ae.safetensors"
]
},
{
"id": 9,
"type": "SaveImage",
"pos": [
1862.43359375,
96.36107635498047
],
"size": [
828.9535522460938,
893.8475341796875
],
"flags": {},
"order": 12,
"mode": 0,
"inputs": [
{
"name": "images",
"localized_name": "images",
"type": "IMAGE",
"link": 95
}
],
"outputs": [],
"properties": {
"Node name for S&R": "SaveImage"
},
"widgets_values": [
"ComfyUI"
]
}
],
"links": [
[
7,
3,
0,
8,
0,
"LATENT"
],
[
41,
23,
0,
26,
0,
"CONDITIONING"
],
[
60,
32,
0,
8,
1,
"VAE"
],
[
62,
34,
0,
23,
0,
"CLIP"
],
[
63,
34,
0,
7,
0,
"CLIP"
],
[
77,
38,
0,
3,
1,
"CONDITIONING"
],
[
78,
38,
1,
3,
2,
"CONDITIONING"
],
[
80,
26,
0,
38,
0,
"CONDITIONING"
],
[
81,
7,
0,
38,
1,
"CONDITIONING"
],
[
82,
32,
0,
38,
2,
"VAE"
],
[
88,
38,
2,
3,
3,
"LATENT"
],
[
95,
8,
0,
9,
0,
"IMAGE"
],
[
102,
45,
0,
3,
0,
"MODEL"
],
[
105,
17,
0,
58,
0,
"IMAGE"
],
[
106,
17,
1,
58,
1,
"MASK"
],
[
107,
58,
0,
38,
3,
"IMAGE"
],
[
108,
58,
1,
38,
4,
"MASK"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 1.7985878990921451,
"offset": [
-287.8887097712823,
208.1745856210748
]
},
"node_versions": {
"comfy-core": "0.3.24",
"comfyui-inpainteasy": "1.0.2"
}
},
"version": 0.4
}
\ No newline at end of file
This diff is collapsed.
import torch
from controlnet_aux import CannyDetector
from diffusers import FluxControlPipeline
from diffusers.utils import load_image
from nunchaku import NunchakuFluxTransformer2dModel
from nunchaku.utils import get_precision
precision = get_precision() # auto-detect your precision is 'int4' or 'fp4' based on your GPU
transformer = NunchakuFluxTransformer2dModel.from_pretrained(f"mit-han-lab/svdq-{precision}-flux.1-dev")
pipe = FluxControlPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=torch.bfloat16
).to("cuda")
### LoRA Related Code ###
transformer.update_lora_params(
"black-forest-labs/FLUX.1-Canny-dev-lora/flux1-canny-dev-lora.safetensors"
) # Path to your LoRA safetensors, can also be a remote HuggingFace path
transformer.set_lora_strength(0.85) # Your LoRA strength here
### End of LoRA Related Code ###
prompt = (
"A robot made of exotic candies and chocolates of different kinds. "
"The background is filled with confetti and celebratory gifts."
)
control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
processor = CannyDetector()
control_image = processor(
control_image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024
)
image = pipe(
prompt=prompt, control_image=control_image, height=1024, width=1024, num_inference_steps=50, guidance_scale=30.0
).images[0]
image.save(f"flux.1-canny-dev-lora-{precision}.png")
......@@ -4,13 +4,18 @@ from diffusers import FluxControlPipeline
from diffusers.utils import load_image
from nunchaku import NunchakuFluxTransformer2dModel
from nunchaku.utils import get_precision
transformer = NunchakuFluxTransformer2dModel.from_pretrained("mit-han-lab/svdq-int4-flux.1-canny-dev")
precision = get_precision() # auto-detect your precision is 'int4' or 'fp4' based on your GPU
transformer = NunchakuFluxTransformer2dModel.from_pretrained(f"mit-han-lab/svdq-{precision}-flux.1-canny-dev")
pipe = FluxControlPipeline.from_pretrained(
"black-forest-labs/FLUX.1-Canny-dev", transformer=transformer, torch_dtype=torch.bfloat16
).to("cuda")
prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
prompt = (
"A robot made of exotic candies and chocolates of different kinds. "
"The background is filled with confetti and celebratory gifts."
)
control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
processor = CannyDetector()
......@@ -21,4 +26,4 @@ control_image = processor(
image = pipe(
prompt=prompt, control_image=control_image, height=1024, width=1024, num_inference_steps=50, guidance_scale=30.0
).images[0]
image.save("flux.1-canny-dev.png")
image.save(f"flux.1-canny-dev-{precision}.png")
import torch
from diffusers import FluxControlPipeline
from diffusers.utils import load_image
from image_gen_aux import DepthPreprocessor
from nunchaku import NunchakuFluxTransformer2dModel
from nunchaku.utils import get_precision
precision = get_precision() # auto-detect your precision is 'int4' or 'fp4' based on your GPU
transformer = NunchakuFluxTransformer2dModel.from_pretrained(f"mit-han-lab/svdq-{precision}-flux.1-dev")
pipe = FluxControlPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=torch.bfloat16
).to("cuda")
### LoRA Related Code ###
transformer.update_lora_params(
"black-forest-labs/FLUX.1-Depth-dev-lora/flux1-depth-dev-lora.safetensors"
) # Path to your LoRA safetensors, can also be a remote HuggingFace path
transformer.set_lora_strength(0.85) # Your LoRA strength here
### End of LoRA Related Code ###
control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
control_image = processor(control_image)[0].convert("RGB")
image = pipe(
prompt="A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts.", # noqa: E501
control_image=control_image,
height=1024,
width=1024,
num_inference_steps=30,
guidance_scale=10.0,
generator=torch.Generator().manual_seed(42),
).images[0]
image.save(f"flux.1-depth-dev-lora-{precision}.png")
......@@ -4,8 +4,10 @@ from diffusers.utils import load_image
from image_gen_aux import DepthPreprocessor
from nunchaku import NunchakuFluxTransformer2dModel
from nunchaku.utils import get_precision
transformer = NunchakuFluxTransformer2dModel.from_pretrained("mit-han-lab/svdq-int4-flux.1-depth-dev")
precision = get_precision() # auto-detect your precision is 'int4' or 'fp4' based on your GPU
transformer = NunchakuFluxTransformer2dModel.from_pretrained(f"mit-han-lab/svdq-{precision}-flux.1-depth-dev")
pipe = FluxControlPipeline.from_pretrained(
"black-forest-labs/FLUX.1-Depth-dev",
......@@ -13,7 +15,10 @@ pipe = FluxControlPipeline.from_pretrained(
torch_dtype=torch.bfloat16,
).to("cuda")
prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
prompt = (
"A robot made of exotic candies and chocolates of different kinds. "
"The background is filled with confetti and celebratory gifts."
)
control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
......@@ -22,4 +27,4 @@ control_image = processor(control_image)[0].convert("RGB")
image = pipe(
prompt=prompt, control_image=control_image, height=1024, width=1024, num_inference_steps=30, guidance_scale=10.0
).images[0]
image.save("flux.1-depth-dev.png")
image.save(f"flux.1-depth-dev-{precision}.png")
import torch
from diffusers import FluxPipeline
from nunchaku import NunchakuFluxTransformer2dModel
from nunchaku.caching.diffusers_adapters import apply_cache_on_pipe
from nunchaku.utils import get_precision
precision = get_precision() # auto-detect your precision is 'int4' or 'fp4' based on your GPU
transformer = NunchakuFluxTransformer2dModel.from_pretrained(f"mit-han-lab/svdq-{precision}-flux.1-dev")
pipeline = FluxPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=torch.bfloat16
).to("cuda")
apply_cache_on_pipe(
pipeline, residual_diff_threshold=0.12
) # Set the first-block cache threshold. Increasing the value enhances speed at the cost of quality.
image = pipeline(["A cat holding a sign that says hello world"], num_inference_steps=50).images[0]
image.save(f"flux.1-dev-cache-{precision}.png")
import torch
from diffusers import FluxControlNetModel, FluxControlNetPipeline
from diffusers.models import FluxMultiControlNetModel
from diffusers.utils import load_image
from nunchaku import NunchakuFluxTransformer2dModel
from nunchaku.caching.diffusers_adapters.flux import apply_cache_on_pipe
from nunchaku.utils import get_precision
base_model = "black-forest-labs/FLUX.1-dev"
controlnet_model_union = "Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro"
controlnet_union = FluxControlNetModel.from_pretrained(controlnet_model_union, torch_dtype=torch.bfloat16)
controlnet = FluxMultiControlNetModel([controlnet_union]) # we always recommend loading via FluxMultiControlNetModel
precision = get_precision()
transformer = NunchakuFluxTransformer2dModel.from_pretrained(
f"mit-han-lab/svdq-{precision}-flux.1-dev", torch_dtype=torch.bfloat16
)
transformer.set_attention_impl("nunchaku-fp16")
pipeline = FluxControlNetPipeline.from_pretrained(
base_model, transformer=transformer, controlnet=controlnet, torch_dtype=torch.bfloat16
).to("cuda")
# apply_cache_on_pipe(
# pipeline, residual_diff_threshold=0.1
# ) # Uncomment this line to enable first-block cache to speedup generation
prompt = "A anime style girl with messy beach waves."
control_image_depth = load_image(
"https://huggingface.co/Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro/resolve/main/assets/depth.jpg"
)
control_mode_depth = 2
control_image_canny = load_image(
"https://huggingface.co/Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro/resolve/main/assets/canny.jpg"
)
control_mode_canny = 0
width, height = control_image_depth.size
image = pipeline(
prompt,
control_image=[control_image_depth, control_image_canny],
control_mode=[control_mode_depth, control_mode_canny],
width=width,
height=height,
controlnet_conditioning_scale=[0.3, 0.1],
num_inference_steps=28,
guidance_scale=3.5,
generator=torch.manual_seed(233),
).images[0]
image.save(f"flux.1-dev-controlnet-union-pro-{precision}.png")
......@@ -2,10 +2,13 @@ import torch
from diffusers import FluxPipeline
from nunchaku import NunchakuFluxTransformer2dModel
from nunchaku.utils import get_precision
transformer = NunchakuFluxTransformer2dModel.from_pretrained("mit-han-lab/svdq-fp4-flux.1-dev", precision="fp4")
precision = get_precision() # auto-detect your precision is 'int4' or 'fp4' based on your GPU
transformer = NunchakuFluxTransformer2dModel.from_pretrained(f"mit-han-lab/svdq-{precision}-flux.1-dev")
transformer.set_attention_impl("nunchaku-fp16") # set attention implementation to fp16
pipeline = FluxPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=torch.bfloat16
).to("cuda")
image = pipeline("A cat holding a sign that says hello world", num_inference_steps=50, guidance_scale=3.5).images[0]
image.save("flux.1-dev.png")
image = pipeline(["A cat holding a sign that says hello world"], num_inference_steps=50).images[0]
image.save(f"flux.1-dev-cache-{precision}.png")
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment