basic_generate.py 1.06 KB
Newer Older
Casper Hansen's avatar
Casper Hansen committed
1
2
3
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer, TextStreamer

Casper Hansen's avatar
Casper Hansen committed
4
quant_path = "TheBloke/Mistral-7B-OpenOrca-AWQ"
Casper Hansen's avatar
Casper Hansen committed
5
6

# Load model
7
model = AutoAWQForCausalLM.from_quantized(quant_path, fuse_layers=True)
Casper Hansen's avatar
Casper Hansen committed
8
tokenizer = AutoTokenizer.from_pretrained(quant_path, trust_remote_code=True)
Casper Hansen's avatar
Casper Hansen committed
9
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
Casper Hansen's avatar
Casper Hansen committed
10
11
12

# Convert prompt to tokens
prompt_template = """\
Casper Hansen's avatar
Casper Hansen committed
13
14
15
16
17
<|im_start|>system
You are MistralOrca, a large language model trained by Alignment Lab AI. Write out your reasoning step-by-step to be sure you get the right answers!<|im_end|>
<|im_start|>user
{prompt}<|im_end|>
<|im_start|>assistant"""
Casper Hansen's avatar
Casper Hansen committed
18

Casper Hansen's avatar
Casper Hansen committed
19
20
21
22
prompt = "You're standing on the surface of the Earth. "\
        "You walk one mile south, one mile west and one mile north. "\
        "You end up exactly where you started. Where are you?"

Casper Hansen's avatar
Casper Hansen committed
23
tokens = tokenizer(
Casper Hansen's avatar
Casper Hansen committed
24
    prompt_template.format(prompt=prompt), 
Casper Hansen's avatar
Casper Hansen committed
25
26
27
28
29
30
31
32
    return_tensors='pt'
).input_ids.cuda()

# Generate output
generation_output = model.generate(
    tokens, 
    streamer=streamer,
    max_new_tokens=512
Casper Hansen's avatar
Casper Hansen committed
33
)