basic_generate.py 873 Bytes
Newer Older
Casper Hansen's avatar
Casper Hansen committed
1
2
3
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer, TextStreamer

4
quant_path = "TheBloke/Mistral-7B-Instruct-v0.2-AWQ"
Casper Hansen's avatar
Casper Hansen committed
5
6

# Load model
7
model = AutoAWQForCausalLM.from_quantized(quant_path, fuse_layers=True)
Casper Hansen's avatar
Casper Hansen committed
8
tokenizer = AutoTokenizer.from_pretrained(quant_path, trust_remote_code=True)
Casper Hansen's avatar
Casper Hansen committed
9
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
Casper Hansen's avatar
Casper Hansen committed
10
11

# Convert prompt to tokens
12
prompt_template = "[INST] {prompt} [/INST]"
Casper Hansen's avatar
Casper Hansen committed
13

Casper Hansen's avatar
Casper Hansen committed
14
15
16
17
prompt = "You're standing on the surface of the Earth. "\
        "You walk one mile south, one mile west and one mile north. "\
        "You end up exactly where you started. Where are you?"

Casper Hansen's avatar
Casper Hansen committed
18
tokens = tokenizer(
Casper Hansen's avatar
Casper Hansen committed
19
    prompt_template.format(prompt=prompt), 
Casper Hansen's avatar
Casper Hansen committed
20
21
22
23
24
25
26
27
    return_tensors='pt'
).input_ids.cuda()

# Generate output
generation_output = model.generate(
    tokens, 
    streamer=streamer,
    max_new_tokens=512
Casper Hansen's avatar
Casper Hansen committed
28
)