from transformers import AutoModelForCausalLM, AutoTokenizer import torch model_id = "/workspace/JIUTIAN-139MoE-chat" tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True) text = "Please introduce the Great Wall." text = "Human:\n" + text + "\n\nAssistant:\n" inputs = tokenizer(text, return_tensors="pt", add_special_tokens=False,padding_side='left',truncation_side='left') outputs = model.generate(**inputs, max_new_tokens=64, repetition_penalty=1.03,do_sample=False,eos_token_id=0) print(tokenizer.decode(outputs[0],skip_special_tokens=True))