import torch from modelscope import AutoModelForCausalLM, AutoTokenizer from encoder.image_tokenizer import ImageTokenizer from decoder.utils import generate_crop_size_list, var_center_crop from decoder import decode_vq_tokens from PIL import Image model_path = "inclusionAI/LLaDA2.0-Uni" tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_path, device_map="auto", torch_dtype="bfloat16", trust_remote_code=True ).eval() model.tokenizer = tokenizer # Encode source image image_tokenizer = ImageTokenizer(model_path=model_path, device="cuda") crop_size_list = generate_crop_size_list((512 // 32) ** 2, 32) pil_image = var_center_crop(Image.open("./assets/edit_example.png").convert("RGB"), crop_size_list=crop_size_list) info = image_tokenizer.encode_with_info(pil_image) image_tokens = [x + model.config.image_token_offset for x in info["token_ids"]] _, h, w = info["grid_thw"] # Edit the image result = model.edit_image( image_tokens, h, w, instruction="Change the background to a beach.", steps=8, cfg_text_scale=4.0, ) # Decode to PIL image edited_image = decode_vq_tokens(result["token_ids"], result["h"], result["w"], model_path, "cuda", num_steps=8, decode_mode="decoder-turbo",) edited_image.save("edited.png")