# Changed from https://github.com/GaParmar/img2img-turbo/blob/main/gradio_sketch2image.py import os import random import time from datetime import datetime import torch from diffusers import FluxKontextPipeline from PIL import Image from utils import get_args from vars import EXAMPLES, MAX_SEED from nunchaku.models.transformers.transformer_flux import NunchakuFluxTransformer2dModel # import gradio last to avoid conflicts with other imports import gradio as gr # noqa: isort: skip args = get_args() if args.precision == "bf16": pipeline = FluxKontextPipeline.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16) pipeline = pipeline.to("cuda") pipeline.precision = "bf16" else: assert args.precision == "int4" pipeline_init_kwargs = {} transformer = NunchakuFluxTransformer2dModel.from_pretrained( "mit-han-lab/nunchaku-flux.1-kontext-dev/svdq-int4_r32-flux.1-kontext-dev.safetensors" ) pipeline_init_kwargs["transformer"] = transformer if args.use_qencoder: from nunchaku.models.text_encoders.t5_encoder import NunchakuT5EncoderModel text_encoder_2 = NunchakuT5EncoderModel.from_pretrained( "mit-han-lab/nunchaku-t5/awq-int4-flux.1-t5xxl.safetensors" ) pipeline_init_kwargs["text_encoder_2"] = text_encoder_2 pipeline = FluxKontextPipeline.from_pretrained( "black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16, **pipeline_init_kwargs ) pipeline = pipeline.to("cuda") pipeline.precision = "int4" def run(image, prompt: str, num_inference_steps: int, guidance_scale: float, seed: int) -> tuple[Image, str]: img = image["composite"].convert("RGB") start_time = time.time() result_image = pipeline( prompt=prompt, image=img, height=img.height, width=img.width, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, generator=torch.Generator().manual_seed(seed), ).images[0] latency = time.time() - start_time if latency < 1: latency = latency * 1000 latency_str = f"{latency:.2f}ms" else: latency_str = f"{latency:.2f}s" torch.cuda.empty_cache() if args.count_use: if os.path.exists(f"{args.model}-use_count.txt"): with open(f"{args.model}-use_count.txt", "r") as f: count = int(f.read()) else: count = 0 count += 1 current_time = datetime.now() print(f"{current_time}: {count}") with open(f"{args.model}-use_count.txt", "w") as f: f.write(str(count)) with open(f"{args.model}-use_record.txt", "a") as f: f.write(f"{current_time}: {count}\n") return result_image, latency_str with gr.Blocks(css_paths="assets/style.css", title="Nunchaku FLUX.1-Kontext Demo") as demo: with open("assets/description.html", "r") as f: DESCRIPTION = f.read() # Get the GPU properties if torch.cuda.device_count() > 0: gpu_properties = torch.cuda.get_device_properties(0) gpu_memory = gpu_properties.total_memory / (1024**3) # Convert to GiB gpu_name = torch.cuda.get_device_name(0) device_info = f"Running on {gpu_name} with {gpu_memory:.0f} GiB memory." else: device_info = "Running on CPU 🥶 This demo does not work on CPU." notice = 'Notice: We will replace unsafe prompts with a default prompt: "A peaceful world."' def get_header_str(): if args.count_use: if os.path.exists("use_count.txt"): with open("use_count.txt", "r") as f: count = int(f.read()) else: count = 0 count_info = ( f"