importtorchfromtransformersimportAutoTokenizer,AutoModelForCausalLMtokenizer=AutoTokenizer.from_pretrained("./internlm2-math-7b",trust_remote_code=True)# Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.model=AutoModelForCausalLM.from_pretrained("./internlm2-math-7b",trust_remote_code=True,torch_dtype=torch.float16).cuda()model=model.eval()response,history=model.chat(tokenizer,"1+1=",history=[],meta_instruction="")print(response)