"vscode:/vscode.git/clone" did not exist on "299217c95ca314e8dbbeca26ce8cdceb440ec53b"
Commit bb455d7c authored by Casper Hansen's avatar Casper Hansen
Browse files

Add generation example for safetensors

parent 7ba31254
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer, TextStreamer
quant_path = "casperhansen/opt-125m-awq"
# Load model
model = AutoAWQForCausalLM.from_quantized(quant_path, fuse_layers=True, use_safetensors=True)
tokenizer = AutoTokenizer.from_pretrained(quant_path, trust_remote_code=True)
streamer = TextStreamer(tokenizer, skip_special_tokens=True)
# Convert prompt to tokens
prompt_template = """\
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
USER: {prompt}
ASSISTANT:"""
tokens = tokenizer(
prompt_template.format(prompt="How are you today?"),
return_tensors='pt'
).input_ids.cuda()
# Generate output
generation_output = model.generate(
tokens,
streamer=streamer,
max_new_tokens=512
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment