Commit 3ad22654 authored by raojy's avatar raojy 💬
Browse files

Update run_Transformers_imgunderstand.py

parent f6be6d17
Pipeline #3643 failed with stages
in 0 seconds
import torch
from modelscope import AutoModelForCausalLM, AutoTokenizer
from encoder.image_tokenizer import ImageTokenizer
from decoder.smart_img_process import smart_resize_images
model_path = "inclusionAI/LLaDA2.0-Uni"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_path, device_map="cuda", torch_dtype="bfloat16", trust_remote_code=True
).eval()
model.tokenizer = tokenizer
# Encode image to discrete tokens
image_tokenizer = ImageTokenizer(model_path=model_path, device="cuda")
pil_image = smart_resize_images(["./assets/understanding_example.png"])[0]
info = image_tokenizer.encode_with_info(pil_image)
image_tokens = [x + model.config.image_token_offset for x in info["token_ids"]]
_, h, w = info["grid_thw"]
# Understand the image
response = model.understand_image(
image_tokens, h, w,
question="Describe this image in detail.",
steps=32, gen_length=2048,
)
print(response)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment