Unverified Commit ac0bc1df authored by Baber Abbasi's avatar Baber Abbasi Committed by GitHub
Browse files

leaderboard - add subtask scores (#2867)

* add subtask scores

* pacify pre-commit
parent 6cc41d34
......@@ -16,13 +16,13 @@ Homepage: [https://huggingface.co/datasets/MBZUAI-Paris/DarijaBench](https://hug
```
@article{shang2024atlaschatadaptinglargelanguage,
title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect},
title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect},
author={Guokan Shang and Hadi Abdine and Yousef Khoubrane and Amr Mohamed and Yassine Abbahaddou and Sofiane Ennadir and Imane Momayiz and Xuguang Ren and Eric Moulines and Preslav Nakov and Michalis Vazirgiannis and Eric Xing},
year={2024},
eprint={2409.17912},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2409.17912},
url={https://arxiv.org/abs/2409.17912},
}
```
......
......@@ -16,13 +16,13 @@ Homepage: [https://huggingface.co/datasets/MBZUAI-Paris/DarijaBench](https://hug
```
@article{shang2024atlaschatadaptinglargelanguage,
title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect},
title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect},
author={Guokan Shang and Hadi Abdine and Yousef Khoubrane and Amr Mohamed and Yassine Abbahaddou and Sofiane Ennadir and Imane Momayiz and Xuguang Ren and Eric Moulines and Preslav Nakov and Michalis Vazirgiannis and Eric Xing},
year={2024},
eprint={2409.17912},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2409.17912},
url={https://arxiv.org/abs/2409.17912},
}
```
......
......@@ -4,4 +4,4 @@ test_split: electro_maroc
- "darija_sentiment_tasks"
"task": "darija_sentiment_electrom"
"task_alias": "Electro Maroc"
doc_to_choice: !function utils.doc_to_choice_2
\ No newline at end of file
doc_to_choice: !function utils.doc_to_choice_2
......@@ -4,4 +4,4 @@ test_split: mac
- "darija_sentiment_tasks"
"task": "darija_sentiment_mac"
"task_alias": "MAC"
doc_to_choice: !function utils.doc_to_choice_3
\ No newline at end of file
doc_to_choice: !function utils.doc_to_choice_3
......@@ -4,4 +4,4 @@ test_split: msac
- "darija_sentiment_tasks"
"task": "darija_sentiment_msac"
"task_alias": "MSAC"
doc_to_choice: !function utils.doc_to_choice_2
\ No newline at end of file
doc_to_choice: !function utils.doc_to_choice_2
......@@ -4,4 +4,4 @@ test_split: msda
- "darija_sentiment_tasks"
"task": "darija_sentiment_msda"
"task_alias": "MSDA"
doc_to_choice: !function utils.doc_to_choice_3
\ No newline at end of file
doc_to_choice: !function utils.doc_to_choice_3
......@@ -4,4 +4,4 @@ test_split: myc
- "darija_sentiment_tasks"
"task": "darija_sentiment_myc"
"task_alias": "MYC"
doc_to_choice: !function utils.doc_to_choice_2
\ No newline at end of file
doc_to_choice: !function utils.doc_to_choice_2
from lm_eval.api.filter import Filter
from lm_eval.api.registry import register_filter
alpha = ['A', 'B', 'C']
alpha = ["A", "B", "C"]
out_dic = {"ايجابي": 1, "سلبي": 0, "ماكينش إحساس": 2}
def doc_to_text(doc):
return doc["messages"][0]["content"].replace('-سلبي', 'A. سلبي').replace('-ايجابي', 'B. ايجابي').replace('-ماكينش إحساس', 'C. ماكينش إحساس\nThe answer should be strictly one letter of the following: A, B, C.')#.replace('شنو هو الإحساس ديال هاد الجملة؟', 'شنو هو الإحساس ديال هاد الجملة؟')
return (
doc["messages"][0]["content"]
.replace("-سلبي", "A. سلبي")
.replace("-ايجابي", "B. ايجابي")
.replace(
"-ماكينش إحساس",
"C. ماكينش إحساس\nThe answer should be strictly one letter of the following: A, B, C.",
)
) # .replace('شنو هو الإحساس ديال هاد الجملة؟', 'شنو هو الإحساس ديال هاد الجملة؟')
def doc_to_choice_3(doc):
return alpha
def doc_to_choice_2(doc):
return alpha[:2]
def doc_to_target(doc):
return alpha[out_dic[doc["messages"][1]["content"]]]
......@@ -16,13 +16,13 @@ Homepage: [https://huggingface.co/datasets/MBZUAI-Paris/DarijaBench](https://hug
```
@article{shang2024atlaschatadaptinglargelanguage,
title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect},
title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect},
author={Guokan Shang and Hadi Abdine and Yousef Khoubrane and Amr Mohamed and Yassine Abbahaddou and Sofiane Ennadir and Imane Momayiz and Xuguang Ren and Eric Moulines and Preslav Nakov and Michalis Vazirgiannis and Eric Xing},
year={2024},
eprint={2409.17912},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2409.17912},
url={https://arxiv.org/abs/2409.17912},
}
```
......
import evaluate
import datasets
import evaluate
def strip(resps, docs):
"""
......@@ -9,50 +10,72 @@ def strip(resps, docs):
def doc_to_text(doc):
doc_text = doc["messages"][0]["content"].replace("لخص هاد المقطع", "لخص هاد المقطع في ٣٠ كلمة")
doc_text = doc["messages"][0]["content"].replace(
"لخص هاد المقطع", "لخص هاد المقطع في ٣٠ كلمة"
)
return doc_text
def doc_to_target(doc):
return doc["messages"][1]["content"]
def bert(items):
return items
def Average(lst):
return sum(lst) / len(lst)
return sum(lst) / len(lst)
def darijabert(items):
bert_model = 'SI2M-Lab/DarijaBERT'
bert_model = "SI2M-Lab/DarijaBERT"
bert_score = evaluate.load("bertscore")
predictions, references = zip(*items)
bert = bert_score.compute(predictions=predictions, references=references, model_type=bert_model, num_layers=12)
return Average(bert['f1'])
bert = bert_score.compute(
predictions=predictions,
references=references,
model_type=bert_model,
num_layers=12,
)
return Average(bert["f1"])
def rouge1(items):
return items
def rougeL(items):
return items
def rouge2(items):
return items
def rougeLsum(items):
return items
def agg_rougelsum(items):
rouge = evaluate.load("rouge")
predictions, references = zip(*items)
return rouge.compute(predictions=predictions, references=references)["rougeLsum"]
def agg_rouge1(items):
rouge = evaluate.load("rouge")
predictions, references = zip(*items)
return rouge.compute(predictions=predictions, references=references)["rouge1"]
def agg_rouge2(items):
rouge = evaluate.load("rouge")
predictions, references = zip(*items)
return rouge.compute(predictions=predictions, references=references)["rouge2"]
def agg_rougel(items):
rouge = evaluate.load("rouge")
predictions, references = zip(*items)
return rouge.compute(predictions=predictions, references=references)["rougeL"]
\ No newline at end of file
return rouge.compute(predictions=predictions, references=references)["rougeL"]
......@@ -16,13 +16,13 @@ Homepage: [https://huggingface.co/datasets/MBZUAI-Paris/DarijaBench](https://hug
```
@article{shang2024atlaschatadaptinglargelanguage,
title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect},
title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect},
author={Guokan Shang and Hadi Abdine and Yousef Khoubrane and Amr Mohamed and Yassine Abbahaddou and Sofiane Ennadir and Imane Momayiz and Xuguang Ren and Eric Moulines and Preslav Nakov and Michalis Vazirgiannis and Eric Xing},
year={2024},
eprint={2409.17912},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2409.17912},
url={https://arxiv.org/abs/2409.17912},
}
```
......
test_split: doda
\ No newline at end of file
test_split: doda
......@@ -9,4 +9,4 @@ include:
metric_list:
- metric: !function utils.bert
aggregation: !function utils.bertbase
higher_is_better: true
\ No newline at end of file
higher_is_better: true
......@@ -9,4 +9,4 @@ include:
metric_list:
- metric: !function utils.bert
aggregation: !function utils.camembert
higher_is_better: true
\ No newline at end of file
higher_is_better: true
......@@ -9,4 +9,4 @@ include:
metric_list:
- metric: !function utils.bert
aggregation: !function utils.arabert
higher_is_better: true
\ No newline at end of file
higher_is_better: true
......@@ -9,4 +9,4 @@ include:
metric_list:
- metric: !function utils.bert
aggregation: !function utils.darijabert
higher_is_better: true
\ No newline at end of file
higher_is_better: true
......@@ -9,4 +9,4 @@ include:
metric_list:
- metric: !function utils.bert
aggregation: !function utils.darijabert
higher_is_better: true
\ No newline at end of file
higher_is_better: true
test_split: flores_plus
\ No newline at end of file
test_split: flores_plus
......@@ -9,4 +9,4 @@ include:
metric_list:
- metric: !function utils.bert
aggregation: !function utils.bertbase
higher_is_better: true
\ No newline at end of file
higher_is_better: true
......@@ -9,4 +9,4 @@ include:
metric_list:
- metric: !function utils.bert
aggregation: !function utils.camembert
higher_is_better: true
\ No newline at end of file
higher_is_better: true
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment