Update run_Transformers_imgunderstand.py

3ad22654 · raojy · f6be6d17 · 3ad22654
Commit 3ad22654 authored May 20, 2026 by raojy 💬
Show whitespace changes
Inline Side-by-side

Showing with 26 additions and 0 deletions

run_Transformers_imgunderstand.py run_Transformers_imgunderstand.py +26 -0

No files found.
--- a/run_Transformers_imgunderstand.py
+++ b/run_Transformers_imgunderstand.py
+import torch
+from modelscope import AutoModelForCausalLM, AutoTokenizer
+from encoder.image_tokenizer import ImageTokenizer
+from decoder.smart_img_process import smart_resize_images
+
+model_path = "inclusionAI/LLaDA2.0-Uni"
+tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    model_path, device_map="cuda", torch_dtype="bfloat16", trust_remote_code=True
+).eval()
+model.tokenizer = tokenizer
+
+# Encode image to discrete tokens
+image_tokenizer = ImageTokenizer(model_path=model_path, device="cuda")
+pil_image = smart_resize_images(["./assets/understanding_example.png"])[0]
+info = image_tokenizer.encode_with_info(pil_image)
+image_tokens = [x + model.config.image_token_offset for x in info["token_ids"]]
+_, h, w = info["grid_thw"]
+
+# Understand the image
+response = model.understand_image(
+    image_tokens, h, w,
+    question="Describe this image in detail.",
+    steps=32, gen_length=2048,
+)
+print(response)