import torch from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer base_model = "./weights/CodeLlama-7b-Instruct-hf" max_gen_length = 4096 description = "In Bash, how do I list all text files in the current directory (excluding subdirectories) that have been modified in the last month?" tokenizer = AutoTokenizer.from_pretrained( base_model, model_max_length=max_gen_length, ) chat = [ {"role": "user", "content": f"{description}"}, ] print(tokenizer.apply_chat_template(chat, tokenize=False)) model_input = tokenizer.apply_chat_template(chat,return_tensors="pt",truncation=True,).to("cuda") model = AutoModelForCausalLM.from_pretrained( base_model, torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True, trust_remote_code=True, ) model.eval() with torch.no_grad(): print(tokenizer.decode(model.generate(model_input, max_new_tokens=max_gen_length)[0], skip_special_tokens=True))