from transformers import AutoTokenizer, AutoModel from peft import PeftModel, PeftConfig import torch import os os.environ['CUDA_VISIBLE_DEVICES'] = '1' # 原始的模型路径 model_name_or_path = "/chatglm/ChatGLM2-6B-main/ChatGLM2-6B-main/zero_nlp-main/pretrained_model" # 训练后的lora保存的路径 peft_model_id = "output-chatglm1/adgen-chatglm2-6b-lora_version/checkpoint-2" tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) model = AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True, device_map='auto', torch_dtype=torch.float16) # .half().cuda() model = PeftModel.from_pretrained(model, peft_model_id) model = model.eval() response, history = model.chat(tokenizer, "类型#上衣*材质#牛仔布*颜色#白色*风格#简约*图案#刺绣*衣样式#外套*衣款式#破洞", history=[]) print(response)