from transformers import AutoProcessor, Glm4vForConditionalGeneration import torch import argparse parser = argparse.ArgumentParser() parser.add_argument("--model_path", type=str, default="THUDM/GLM-4.1V-9B-Thinking", help="Path to the model") args = parser.parse_args() if __name__ == "__main__": # Example usage messages = [ { "role": "user", "content": [ { "type": "image", "url": "../doc/Grayscale_8bits_palette_sample_image.png" }, { "type": "text", "text": "describe this image" } ], } ] # Load model and processor processor = AutoProcessor.from_pretrained(args.model_path, use_fast=True) model = Glm4vForConditionalGeneration.from_pretrained( pretrained_model_name_or_path=args.model_path, torch_dtype=torch.bfloat16, device_map="auto", ) # process inputs inputs = processor.apply_chat_template( messages, tokenize=True, add_generation_prompt=True, return_dict=True, return_tensors="pt" ).to(model.device) # generate generated_ids = model.generate(**inputs, max_new_tokens=8192) output_text = processor.decode(generated_ids[0][inputs["input_ids"].shape[1]:], skip_special_tokens=False) print("output:\n", output_text)