import torch from transformers import AutoModelForCausalLM, AutoTokenizer MODEL_PATH = "zai-org/GLM-4.5" messages = [{"role": "user", "content": "你好"}] tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) inputs = tokenizer.apply_chat_template( messages, tokenize=True, add_generation_prompt=True, return_dict=True, return_tensors="pt", ) model = AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=MODEL_PATH, torch_dtype=torch.bfloat16, device_map="auto", ) inputs = inputs.to(model.device) generated_ids = model.generate(**inputs, max_new_tokens=128, do_sample=False) output_text = tokenizer.decode(generated_ids[0][inputs.input_ids.shape[1] :]) print(output_text)