from transformers import AutoModel, AutoProcessor import torch device = "cuda" if torch.cuda.is_available() else "cpu" repo_id = "ZhipuAI/GLM-ASR-Nano-2512" processor = AutoProcessor.from_pretrained(repo_id) model = AutoModel.from_pretrained(repo_id, dtype=torch.bfloat16, device_map=device) audio_url = "examples/example_zh.wav" messages = [ { "role": "user", "content": [ { "type": "audio", "url": audio_url, }, {"type": "text", "text": "Please transcribe this audio into text"}, ], } ] inputs = processor.apply_chat_template( messages, tokenize=True, add_generation_prompt=True, return_dict=True, return_tensors="pt" ) inputs = inputs.to(device, dtype=torch.bfloat16) outputs = model.generate(**inputs, max_new_tokens=128, do_sample=False) print(processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True))