Unverified Commit ac0bc1df authored by Baber Abbasi's avatar Baber Abbasi Committed by GitHub
Browse files

leaderboard - add subtask scores (#2867)

* add subtask scores

* pacify pre-commit
parent 6cc41d34
from lm_eval.api.filter import Filter from lm_eval.api.filter import Filter
from lm_eval.api.registry import register_filter from lm_eval.api.registry import register_filter
alpha = ['A', 'B', 'C']
alpha = ["A", "B", "C"]
out_dic = {"ايجابي": 1, "سلبي": 0, "ماكينش إحساس": 2} out_dic = {"ايجابي": 1, "سلبي": 0, "ماكينش إحساس": 2}
def doc_to_text(doc): def doc_to_text(doc):
return doc["messages"][0]["content"].replace('-سلبي', 'A. سلبي').replace('-ايجابي', 'B. ايجابي').replace('-ماكينش إحساس', 'C. ماكينش إحساس\nThe answer should be strictly one letter of the following: A, B, C.')#.replace('شنو هو الإحساس ديال هاد الجملة؟', 'شنو هو الإحساس ديال هاد الجملة؟') return (
doc["messages"][0]["content"]
.replace("-سلبي", "A. سلبي")
.replace("-ايجابي", "B. ايجابي")
.replace(
"-ماكينش إحساس",
"C. ماكينش إحساس\nThe answer should be strictly one letter of the following: A, B, C.",
)
) # .replace('شنو هو الإحساس ديال هاد الجملة؟', 'شنو هو الإحساس ديال هاد الجملة؟')
def doc_to_choice_3(doc): def doc_to_choice_3(doc):
return alpha return alpha
def doc_to_choice_2(doc): def doc_to_choice_2(doc):
return alpha[:2] return alpha[:2]
def doc_to_target(doc): def doc_to_target(doc):
return alpha[out_dic[doc["messages"][1]["content"]]] return alpha[out_dic[doc["messages"][1]["content"]]]
import evaluate
import datasets import datasets
import evaluate
def strip(resps, docs): def strip(resps, docs):
""" """
...@@ -9,49 +10,71 @@ def strip(resps, docs): ...@@ -9,49 +10,71 @@ def strip(resps, docs):
def doc_to_text(doc): def doc_to_text(doc):
doc_text = doc["messages"][0]["content"].replace("لخص هاد المقطع", "لخص هاد المقطع في ٣٠ كلمة") doc_text = doc["messages"][0]["content"].replace(
"لخص هاد المقطع", "لخص هاد المقطع في ٣٠ كلمة"
)
return doc_text return doc_text
def doc_to_target(doc): def doc_to_target(doc):
return doc["messages"][1]["content"] return doc["messages"][1]["content"]
def bert(items): def bert(items):
return items return items
def Average(lst): def Average(lst):
return sum(lst) / len(lst) return sum(lst) / len(lst)
def darijabert(items): def darijabert(items):
bert_model = 'SI2M-Lab/DarijaBERT' bert_model = "SI2M-Lab/DarijaBERT"
bert_score = evaluate.load("bertscore") bert_score = evaluate.load("bertscore")
predictions, references = zip(*items) predictions, references = zip(*items)
bert = bert_score.compute(predictions=predictions, references=references, model_type=bert_model, num_layers=12) bert = bert_score.compute(
return Average(bert['f1']) predictions=predictions,
references=references,
model_type=bert_model,
num_layers=12,
)
return Average(bert["f1"])
def rouge1(items): def rouge1(items):
return items return items
def rougeL(items): def rougeL(items):
return items return items
def rouge2(items): def rouge2(items):
return items return items
def rougeLsum(items): def rougeLsum(items):
return items return items
def agg_rougelsum(items): def agg_rougelsum(items):
rouge = evaluate.load("rouge") rouge = evaluate.load("rouge")
predictions, references = zip(*items) predictions, references = zip(*items)
return rouge.compute(predictions=predictions, references=references)["rougeLsum"] return rouge.compute(predictions=predictions, references=references)["rougeLsum"]
def agg_rouge1(items): def agg_rouge1(items):
rouge = evaluate.load("rouge") rouge = evaluate.load("rouge")
predictions, references = zip(*items) predictions, references = zip(*items)
return rouge.compute(predictions=predictions, references=references)["rouge1"] return rouge.compute(predictions=predictions, references=references)["rouge1"]
def agg_rouge2(items): def agg_rouge2(items):
rouge = evaluate.load("rouge") rouge = evaluate.load("rouge")
predictions, references = zip(*items) predictions, references = zip(*items)
return rouge.compute(predictions=predictions, references=references)["rouge2"] return rouge.compute(predictions=predictions, references=references)["rouge2"]
def agg_rougel(items): def agg_rougel(items):
rouge = evaluate.load("rouge") rouge = evaluate.load("rouge")
predictions, references = zip(*items) predictions, references = zip(*items)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment