import torch
from modelscope import AutoModelForCausalLM, AutoTokenizer
from decoder import decode_vq_tokens

model_path = "inclusionAI/LLaDA2.0-Uni"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_path, device_map="cuda", torch_dtype="bfloat16", trust_remote_code=True
).eval()
model.tokenizer = tokenizer

# Generate image tokens with thinking process
result = model.generate_image(
    "A fox with thick, dense, fluffy fur in a winter setting, possibly surrounded by snow.",
    image_h=1024, image_w=1024,
    mode="thinking",
    steps=8, cfg_scale=2.0,
    thinking_steps=32, thinking_gen_length=4096,
)

# Print thinking trace
print("Thinking:", result["thinking"])

# Decode to PIL image
image = decode_vq_tokens(result["token_ids"], result["h"], result["w"], model_path, "cuda", num_steps=8, decode_mode="decoder-turbo",)
image.save("output_thinking.png")