aux_metric.py 507 Bytes
Newer Older
1
2
3
4
from textdistance import levenshtein
from transformers import AutoTokenizer

# Change this tokenizer to fit with the model you are using.
lintangsutawika's avatar
format  
lintangsutawika committed
5
6
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-2.8b", max_new_tokens=128)

7
8

def token_edit_distance(references, predictions, **kwargs):
lintangsutawika's avatar
format  
lintangsutawika committed
9
10
11
    print(references)
    print(predictions)
    print("###")
12
13
14
    ref_tokens = tokenizer.encode(references[0])
    pred_tokens = tokenizer.encode(predictions[0])
    return levenshtein.distance(ref_tokens, pred_tokens)