text_completion.py 969 Bytes
Newer Older
Rayyyyy's avatar
Rayyyyy committed
1
2
3
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

Rayyyyy's avatar
Rayyyyy committed
4
5
model_name_or_path = "deepseek-ai/DeepSeek-V2-Lite"

Rayyyyy's avatar
Rayyyyy committed
6
7
8
9
10
11
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)

# `device_map` cannot be set to `auto`
model = AutoModelForCausalLM.from_pretrained(
        model_name_or_path,
        trust_remote_code=True,
Rayyyyy's avatar
update  
Rayyyyy committed
12
        torch_dtype=torch.bfloat16).cuda()
Rayyyyy's avatar
Rayyyyy committed
13
14
15
16
17

model.generation_config = GenerationConfig.from_pretrained(model_name_or_path)
model.generation_config.pad_token_id = model.generation_config.eos_token_id

text = "An attention function can be described as mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is"
Rayyyyy's avatar
update  
Rayyyyy committed
18

Rayyyyy's avatar
Rayyyyy committed
19
20
21
inputs = tokenizer(text, return_tensors="pt")
outputs = model.generate(**inputs.to(model.device), max_new_tokens=100)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
Rayyyyy's avatar
update  
Rayyyyy committed
22

Rayyyyy's avatar
Rayyyyy committed
23
print("result", result)