""" This script creates a Gradio demo with a Transformers backend for the glm-4v-9b model, allowing users to interact with the model through a Gradio web UI. Usage: - Run the script to start the Gradio server. - Interact with the model via the web UI. Requirements: - Gradio package - Type `pip install gradio==4.44.1` to install Gradio. """ import os from io import BytesIO from threading import Thread import gradio as gr import requests import torch from PIL import Image from transformers import AutoModel, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer MODEL_PATH = os.environ.get("MODEL_PATH", "THUDM/glm-4v-9b") tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True, encode_special_tokens=True) model = AutoModel.from_pretrained( MODEL_PATH, trust_remote_code=True, device_map="auto", torch_dtype=torch.bfloat16 ).eval() class StopOnTokens(StoppingCriteria): def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: stop_ids = model.config.eos_token_id for stop_id in stop_ids: if input_ids[0][-1] == stop_id: return True return False def get_image(image_path=None, image_url=None): if image_path: return Image.open(image_path).convert("RGB") elif image_url: response = requests.get(image_url) return Image.open(BytesIO(response.content)).convert("RGB") return None def chatbot(image_path=None, image_url=None, assistant_prompt=""): image = get_image(image_path, image_url) messages = [{"role": "assistant", "content": assistant_prompt}, {"role": "user", "content": "", "image": image}] model_inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_tensors="pt", return_dict=True ).to(next(model.parameters()).device) streamer = TextIteratorStreamer(tokenizer=tokenizer, timeout=60, skip_prompt=True, skip_special_tokens=True) generate_kwargs = { **model_inputs, "streamer": streamer, "max_new_tokens": 1024, "do_sample": True, "top_p": 0.8, "temperature": 0.6, "stopping_criteria": StoppingCriteriaList([StopOnTokens()]), "repetition_penalty": 1.2, "eos_token_id": [151329, 151336, 151338], } t = Thread(target=model.generate, kwargs=generate_kwargs) t.start() response = "" for new_token in streamer: if new_token: response += new_token return image, response.strip() with gr.Blocks() as demo: demo.title = "GLM-4V-9B Image Recognition Demo" demo.description = """ This demo uses the GLM-4V-9B model to got image infomation. """ with gr.Row(): with gr.Column(): image_path_input = gr.File(label="Upload Image (High-Priority)", type="filepath") image_url_input = gr.Textbox(label="Image URL (Low-Priority)") assistant_prompt_input = gr.Textbox(label="Assistant Prompt (You Can Change It)", value="这是什么?") submit_button = gr.Button("Submit") with gr.Column(): chatbot_output = gr.Textbox(label="GLM-4V-9B Model Response") image_output = gr.Image(label="Image Preview") submit_button.click( chatbot, inputs=[image_path_input, image_url_input, assistant_prompt_input], outputs=[image_output, chatbot_output], ) demo.launch(server_name="127.0.0.1", server_port=8911, inbrowser=True, share=False)