infer_test.py

from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("../../chatglm3-6b", trust_remote_code=True)
model = AutoModel.from_pretrained("../../chatglm3-6b", trust_remote_code=True, device='cuda')
model = model.eval()
response, history = model.chat(tokenizer, "你好", history=[])
print(response)
response, history = model.chat(tokenizer, "晚上睡不着应该怎么办", history=history)
print(response)
# print(len(tokenizer))
# vocab_content = tokenizer.get_vocab()
# with open("vocab.txt", "w", encoding="utf-8") as f:
#     for token, index in vocab_content.items():
#         f.write(f"{token} {index}\n")