Commit 2b7d8c2d authored by lintangsutawika's avatar lintangsutawika
Browse files

add TED to BigBench and Brier score to MMLU

parent 5cc65a79
from textdistance import levenshtein
from transformers import AutoTokenizer
# Change this tokenizer to fit with the model you are using.
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-2.8b")
def token_edit_distance(references, predictions, **kwargs):
ref_tokens = tokenizer.encode(references[0])
pred_tokens = tokenizer.encode(predictions[0])
return levenshtein.distance(ref_tokens, pred_tokens)
group: bigbench group: bigbench
dataset_path: bigbench # will switch to `hails/bigbench` when all tasks are pushed dataset_path: hails/bigbench # will switch to `hails/bigbench` when all tasks are pushed
output_type: generate_until output_type: generate_until
dataset_kwargs: dataset_kwargs:
# num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods
...@@ -14,3 +14,6 @@ metric_list: ...@@ -14,3 +14,6 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
ignore_punctuation: true ignore_punctuation: true
- metric: !function aux_metric.token_edit_distance # pip install textdistance
aggregation: mean
higher_is_better: false
\ No newline at end of file
...@@ -15,3 +15,6 @@ metric_list: ...@@ -15,3 +15,6 @@ metric_list:
- metric: acc_norm - metric: acc_norm
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
- metric: brier_score
aggregation: mean
higher_is_better: false
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment