single_dcu.py 553 Bytes
Newer Older
1
2
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
3
tokenizer = AutoTokenizer.from_pretrained("./internlm2-math-7b", trust_remote_code=True) <Your_model_path>
4
5
6
7
8
# Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.
model = AutoModelForCausalLM.from_pretrained("./internlm2-math-7b", trust_remote_code=True, torch_dtype=torch.float16).cuda()
model = model.eval()
response, history = model.chat(tokenizer, "1+1=", history=[], meta_instruction="")
print(response)