llama_tokenizer.py 419 Bytes
Newer Older
zhuwenwen's avatar
zhuwenwen committed
1
2
3
4
5
6
7
8
9
10
11
12
import transformers
import torch

from transformers import LlamaForCausalLM, LlamaTokenizer

tokenizer = LlamaTokenizer.from_pretrained('/data/models/llama-7b-hf/')
with open('./out', 'r') as file:
    output = file.read()
    
output = torch.tensor([int(id_) for id_ in output.split()])
output = tokenizer.batch_decode(output.unsqueeze(0), skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
print(output)