Unverified Commit ac0bc1df authored by Baber Abbasi's avatar Baber Abbasi Committed by GitHub
Browse files

leaderboard - add subtask scores (#2867)

* add subtask scores

* pacify pre-commit
parent 6cc41d34
from lm_eval.api.filter import Filter
from lm_eval.api.registry import register_filter
alpha = ['A', 'B', 'C']
alpha = ["A", "B", "C"]
out_dic = {"ايجابي": 1, "سلبي": 0, "ماكينش إحساس": 2}
def doc_to_text(doc):
return doc["messages"][0]["content"].replace('-سلبي', 'A. سلبي').replace('-ايجابي', 'B. ايجابي').replace('-ماكينش إحساس', 'C. ماكينش إحساس\nThe answer should be strictly one letter of the following: A, B, C.')#.replace('شنو هو الإحساس ديال هاد الجملة؟', 'شنو هو الإحساس ديال هاد الجملة؟')
return (
doc["messages"][0]["content"]
.replace("-سلبي", "A. سلبي")
.replace("-ايجابي", "B. ايجابي")
.replace(
"-ماكينش إحساس",
"C. ماكينش إحساس\nThe answer should be strictly one letter of the following: A, B, C.",
)
) # .replace('شنو هو الإحساس ديال هاد الجملة؟', 'شنو هو الإحساس ديال هاد الجملة؟')
def doc_to_choice_3(doc):
return alpha
def doc_to_choice_2(doc):
return alpha[:2]
def doc_to_target(doc):
return alpha[out_dic[doc["messages"][1]["content"]]]
import evaluate
import datasets
import evaluate
def strip(resps, docs):
"""
......@@ -9,49 +10,71 @@ def strip(resps, docs):
def doc_to_text(doc):
doc_text = doc["messages"][0]["content"].replace("لخص هاد المقطع", "لخص هاد المقطع في ٣٠ كلمة")
doc_text = doc["messages"][0]["content"].replace(
"لخص هاد المقطع", "لخص هاد المقطع في ٣٠ كلمة"
)
return doc_text
def doc_to_target(doc):
return doc["messages"][1]["content"]
def bert(items):
return items
def Average(lst):
return sum(lst) / len(lst)
def darijabert(items):
bert_model = 'SI2M-Lab/DarijaBERT'
bert_model = "SI2M-Lab/DarijaBERT"
bert_score = evaluate.load("bertscore")
predictions, references = zip(*items)
bert = bert_score.compute(predictions=predictions, references=references, model_type=bert_model, num_layers=12)
return Average(bert['f1'])
bert = bert_score.compute(
predictions=predictions,
references=references,
model_type=bert_model,
num_layers=12,
)
return Average(bert["f1"])
def rouge1(items):
return items
def rougeL(items):
return items
def rouge2(items):
return items
def rougeLsum(items):
return items
def agg_rougelsum(items):
rouge = evaluate.load("rouge")
predictions, references = zip(*items)
return rouge.compute(predictions=predictions, references=references)["rougeLsum"]
def agg_rouge1(items):
rouge = evaluate.load("rouge")
predictions, references = zip(*items)
return rouge.compute(predictions=predictions, references=references)["rouge1"]
def agg_rouge2(items):
rouge = evaluate.load("rouge")
predictions, references = zip(*items)
return rouge.compute(predictions=predictions, references=references)["rouge2"]
def agg_rougel(items):
rouge = evaluate.load("rouge")
predictions, references = zip(*items)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment