telechat_infer_demo.py

import os
import torch
import argparse
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

os.environ["CUDA_VISIBLE_DEVICES"] = '0'
parser = argparse.ArgumentParser(description='Load model with specified path.')
parser.add_argument('--path', type=str, required=True, help='Path to the model directory.')
args = parser.parse_args()
#PATH = '../models/12B'
PATH = args.path


def main():
    # 加载模型相关
    tokenizer = AutoTokenizer.from_pretrained(PATH,trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(PATH, trust_remote_code=True, device_map="auto",
                                                 torch_dtype=torch.float16)
    generate_config = GenerationConfig.from_pretrained(PATH)
    model.eval()

    #  chat(bot)模型多轮演示
    print("*" * 10 + "多轮输入演示" + "*" * 10)
    question = "你是谁？"
    print("提问:", question)
    answer, history = model.chat(tokenizer = tokenizer, question=question, history=[], generation_config=generate_config,
                                 stream=False)
    print("回答:", answer)
    print("截至目前的聊天记录是:", history)

    question = "你是谁训练的"
    print("提问:", question)
    # 将history传入
    answer, history = model.chat(tokenizer, question=question, history=history, generation_config=generate_config,
                                 stream=False)
    print("回答是:", answer)
    print("截至目前的聊天记录是:", history)

    # 也可以这么调用传入history
    history = [
        {"role": "user", "content": "你是谁"},
        {"role": "bot", "content": "我是telechat"},
    ]

    question = "你是谁训练的"
    print("提问:", question)
    answer, history = model.chat(tokenizer, question=question, history=history, generation_config=generate_config,
                                 stream=False)
    print("回答是:", answer)
    print("截至目前的聊天记录是:", history)

    # chat(bot)模型 流式返回演示
    print("*" * 10 + "流式输入演示" + "*" * 10)
    question = "你是谁？"
    print("提问:", question)
    gen = model.chat(tokenizer, question=question, history=[], generation_config=generate_config,
                     stream=True)
    for answer, history in gen:
        print("回答是:", answer)
        print("截至目前的聊天记录是:", history)


if __name__ == '__main__':
    main()