"include/vscode:/vscode.git/clone" did not exist on "0dc982907897fcad0182f69a9489e309aef4ebf0"
Unverified Commit ac0bc1df authored by Baber Abbasi's avatar Baber Abbasi Committed by GitHub
Browse files

leaderboard - add subtask scores (#2867)

* add subtask scores

* pacify pre-commit
parent 6cc41d34
...@@ -16,13 +16,13 @@ Homepage: [https://huggingface.co/datasets/MBZUAI-Paris/DarijaBench](https://hug ...@@ -16,13 +16,13 @@ Homepage: [https://huggingface.co/datasets/MBZUAI-Paris/DarijaBench](https://hug
``` ```
@article{shang2024atlaschatadaptinglargelanguage, @article{shang2024atlaschatadaptinglargelanguage,
title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect}, title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect},
author={Guokan Shang and Hadi Abdine and Yousef Khoubrane and Amr Mohamed and Yassine Abbahaddou and Sofiane Ennadir and Imane Momayiz and Xuguang Ren and Eric Moulines and Preslav Nakov and Michalis Vazirgiannis and Eric Xing}, author={Guokan Shang and Hadi Abdine and Yousef Khoubrane and Amr Mohamed and Yassine Abbahaddou and Sofiane Ennadir and Imane Momayiz and Xuguang Ren and Eric Moulines and Preslav Nakov and Michalis Vazirgiannis and Eric Xing},
year={2024}, year={2024},
eprint={2409.17912}, eprint={2409.17912},
archivePrefix={arXiv}, archivePrefix={arXiv},
primaryClass={cs.CL}, primaryClass={cs.CL},
url={https://arxiv.org/abs/2409.17912}, url={https://arxiv.org/abs/2409.17912},
} }
``` ```
......
...@@ -16,13 +16,13 @@ Homepage: [https://huggingface.co/datasets/MBZUAI-Paris/DarijaBench](https://hug ...@@ -16,13 +16,13 @@ Homepage: [https://huggingface.co/datasets/MBZUAI-Paris/DarijaBench](https://hug
``` ```
@article{shang2024atlaschatadaptinglargelanguage, @article{shang2024atlaschatadaptinglargelanguage,
title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect}, title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect},
author={Guokan Shang and Hadi Abdine and Yousef Khoubrane and Amr Mohamed and Yassine Abbahaddou and Sofiane Ennadir and Imane Momayiz and Xuguang Ren and Eric Moulines and Preslav Nakov and Michalis Vazirgiannis and Eric Xing}, author={Guokan Shang and Hadi Abdine and Yousef Khoubrane and Amr Mohamed and Yassine Abbahaddou and Sofiane Ennadir and Imane Momayiz and Xuguang Ren and Eric Moulines and Preslav Nakov and Michalis Vazirgiannis and Eric Xing},
year={2024}, year={2024},
eprint={2409.17912}, eprint={2409.17912},
archivePrefix={arXiv}, archivePrefix={arXiv},
primaryClass={cs.CL}, primaryClass={cs.CL},
url={https://arxiv.org/abs/2409.17912}, url={https://arxiv.org/abs/2409.17912},
} }
``` ```
......
...@@ -4,4 +4,4 @@ test_split: electro_maroc ...@@ -4,4 +4,4 @@ test_split: electro_maroc
- "darija_sentiment_tasks" - "darija_sentiment_tasks"
"task": "darija_sentiment_electrom" "task": "darija_sentiment_electrom"
"task_alias": "Electro Maroc" "task_alias": "Electro Maroc"
doc_to_choice: !function utils.doc_to_choice_2 doc_to_choice: !function utils.doc_to_choice_2
\ No newline at end of file
...@@ -4,4 +4,4 @@ test_split: mac ...@@ -4,4 +4,4 @@ test_split: mac
- "darija_sentiment_tasks" - "darija_sentiment_tasks"
"task": "darija_sentiment_mac" "task": "darija_sentiment_mac"
"task_alias": "MAC" "task_alias": "MAC"
doc_to_choice: !function utils.doc_to_choice_3 doc_to_choice: !function utils.doc_to_choice_3
\ No newline at end of file
...@@ -4,4 +4,4 @@ test_split: msac ...@@ -4,4 +4,4 @@ test_split: msac
- "darija_sentiment_tasks" - "darija_sentiment_tasks"
"task": "darija_sentiment_msac" "task": "darija_sentiment_msac"
"task_alias": "MSAC" "task_alias": "MSAC"
doc_to_choice: !function utils.doc_to_choice_2 doc_to_choice: !function utils.doc_to_choice_2
\ No newline at end of file
...@@ -4,4 +4,4 @@ test_split: msda ...@@ -4,4 +4,4 @@ test_split: msda
- "darija_sentiment_tasks" - "darija_sentiment_tasks"
"task": "darija_sentiment_msda" "task": "darija_sentiment_msda"
"task_alias": "MSDA" "task_alias": "MSDA"
doc_to_choice: !function utils.doc_to_choice_3 doc_to_choice: !function utils.doc_to_choice_3
\ No newline at end of file
...@@ -4,4 +4,4 @@ test_split: myc ...@@ -4,4 +4,4 @@ test_split: myc
- "darija_sentiment_tasks" - "darija_sentiment_tasks"
"task": "darija_sentiment_myc" "task": "darija_sentiment_myc"
"task_alias": "MYC" "task_alias": "MYC"
doc_to_choice: !function utils.doc_to_choice_2 doc_to_choice: !function utils.doc_to_choice_2
\ No newline at end of file
from lm_eval.api.filter import Filter from lm_eval.api.filter import Filter
from lm_eval.api.registry import register_filter from lm_eval.api.registry import register_filter
alpha = ['A', 'B', 'C']
alpha = ["A", "B", "C"]
out_dic = {"ايجابي": 1, "سلبي": 0, "ماكينش إحساس": 2} out_dic = {"ايجابي": 1, "سلبي": 0, "ماكينش إحساس": 2}
def doc_to_text(doc): def doc_to_text(doc):
return doc["messages"][0]["content"].replace('-سلبي', 'A. سلبي').replace('-ايجابي', 'B. ايجابي').replace('-ماكينش إحساس', 'C. ماكينش إحساس\nThe answer should be strictly one letter of the following: A, B, C.')#.replace('شنو هو الإحساس ديال هاد الجملة؟', 'شنو هو الإحساس ديال هاد الجملة؟') return (
doc["messages"][0]["content"]
.replace("-سلبي", "A. سلبي")
.replace("-ايجابي", "B. ايجابي")
.replace(
"-ماكينش إحساس",
"C. ماكينش إحساس\nThe answer should be strictly one letter of the following: A, B, C.",
)
) # .replace('شنو هو الإحساس ديال هاد الجملة؟', 'شنو هو الإحساس ديال هاد الجملة؟')
def doc_to_choice_3(doc): def doc_to_choice_3(doc):
return alpha return alpha
def doc_to_choice_2(doc): def doc_to_choice_2(doc):
return alpha[:2] return alpha[:2]
def doc_to_target(doc): def doc_to_target(doc):
return alpha[out_dic[doc["messages"][1]["content"]]] return alpha[out_dic[doc["messages"][1]["content"]]]
...@@ -16,13 +16,13 @@ Homepage: [https://huggingface.co/datasets/MBZUAI-Paris/DarijaBench](https://hug ...@@ -16,13 +16,13 @@ Homepage: [https://huggingface.co/datasets/MBZUAI-Paris/DarijaBench](https://hug
``` ```
@article{shang2024atlaschatadaptinglargelanguage, @article{shang2024atlaschatadaptinglargelanguage,
title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect}, title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect},
author={Guokan Shang and Hadi Abdine and Yousef Khoubrane and Amr Mohamed and Yassine Abbahaddou and Sofiane Ennadir and Imane Momayiz and Xuguang Ren and Eric Moulines and Preslav Nakov and Michalis Vazirgiannis and Eric Xing}, author={Guokan Shang and Hadi Abdine and Yousef Khoubrane and Amr Mohamed and Yassine Abbahaddou and Sofiane Ennadir and Imane Momayiz and Xuguang Ren and Eric Moulines and Preslav Nakov and Michalis Vazirgiannis and Eric Xing},
year={2024}, year={2024},
eprint={2409.17912}, eprint={2409.17912},
archivePrefix={arXiv}, archivePrefix={arXiv},
primaryClass={cs.CL}, primaryClass={cs.CL},
url={https://arxiv.org/abs/2409.17912}, url={https://arxiv.org/abs/2409.17912},
} }
``` ```
......
import evaluate
import datasets import datasets
import evaluate
def strip(resps, docs): def strip(resps, docs):
""" """
...@@ -9,50 +10,72 @@ def strip(resps, docs): ...@@ -9,50 +10,72 @@ def strip(resps, docs):
def doc_to_text(doc): def doc_to_text(doc):
doc_text = doc["messages"][0]["content"].replace("لخص هاد المقطع", "لخص هاد المقطع في ٣٠ كلمة") doc_text = doc["messages"][0]["content"].replace(
"لخص هاد المقطع", "لخص هاد المقطع في ٣٠ كلمة"
)
return doc_text return doc_text
def doc_to_target(doc): def doc_to_target(doc):
return doc["messages"][1]["content"] return doc["messages"][1]["content"]
def bert(items): def bert(items):
return items return items
def Average(lst): def Average(lst):
return sum(lst) / len(lst) return sum(lst) / len(lst)
def darijabert(items): def darijabert(items):
bert_model = 'SI2M-Lab/DarijaBERT' bert_model = "SI2M-Lab/DarijaBERT"
bert_score = evaluate.load("bertscore") bert_score = evaluate.load("bertscore")
predictions, references = zip(*items) predictions, references = zip(*items)
bert = bert_score.compute(predictions=predictions, references=references, model_type=bert_model, num_layers=12) bert = bert_score.compute(
return Average(bert['f1']) predictions=predictions,
references=references,
model_type=bert_model,
num_layers=12,
)
return Average(bert["f1"])
def rouge1(items): def rouge1(items):
return items return items
def rougeL(items): def rougeL(items):
return items return items
def rouge2(items): def rouge2(items):
return items return items
def rougeLsum(items): def rougeLsum(items):
return items return items
def agg_rougelsum(items): def agg_rougelsum(items):
rouge = evaluate.load("rouge") rouge = evaluate.load("rouge")
predictions, references = zip(*items) predictions, references = zip(*items)
return rouge.compute(predictions=predictions, references=references)["rougeLsum"] return rouge.compute(predictions=predictions, references=references)["rougeLsum"]
def agg_rouge1(items): def agg_rouge1(items):
rouge = evaluate.load("rouge") rouge = evaluate.load("rouge")
predictions, references = zip(*items) predictions, references = zip(*items)
return rouge.compute(predictions=predictions, references=references)["rouge1"] return rouge.compute(predictions=predictions, references=references)["rouge1"]
def agg_rouge2(items): def agg_rouge2(items):
rouge = evaluate.load("rouge") rouge = evaluate.load("rouge")
predictions, references = zip(*items) predictions, references = zip(*items)
return rouge.compute(predictions=predictions, references=references)["rouge2"] return rouge.compute(predictions=predictions, references=references)["rouge2"]
def agg_rougel(items): def agg_rougel(items):
rouge = evaluate.load("rouge") rouge = evaluate.load("rouge")
predictions, references = zip(*items) predictions, references = zip(*items)
return rouge.compute(predictions=predictions, references=references)["rougeL"] return rouge.compute(predictions=predictions, references=references)["rougeL"]
\ No newline at end of file
...@@ -16,13 +16,13 @@ Homepage: [https://huggingface.co/datasets/MBZUAI-Paris/DarijaBench](https://hug ...@@ -16,13 +16,13 @@ Homepage: [https://huggingface.co/datasets/MBZUAI-Paris/DarijaBench](https://hug
``` ```
@article{shang2024atlaschatadaptinglargelanguage, @article{shang2024atlaschatadaptinglargelanguage,
title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect}, title={Atlas-Chat: Adapting Large Language Models for Low-Resource Moroccan Arabic Dialect},
author={Guokan Shang and Hadi Abdine and Yousef Khoubrane and Amr Mohamed and Yassine Abbahaddou and Sofiane Ennadir and Imane Momayiz and Xuguang Ren and Eric Moulines and Preslav Nakov and Michalis Vazirgiannis and Eric Xing}, author={Guokan Shang and Hadi Abdine and Yousef Khoubrane and Amr Mohamed and Yassine Abbahaddou and Sofiane Ennadir and Imane Momayiz and Xuguang Ren and Eric Moulines and Preslav Nakov and Michalis Vazirgiannis and Eric Xing},
year={2024}, year={2024},
eprint={2409.17912}, eprint={2409.17912},
archivePrefix={arXiv}, archivePrefix={arXiv},
primaryClass={cs.CL}, primaryClass={cs.CL},
url={https://arxiv.org/abs/2409.17912}, url={https://arxiv.org/abs/2409.17912},
} }
``` ```
......
test_split: doda test_split: doda
\ No newline at end of file
...@@ -9,4 +9,4 @@ include: ...@@ -9,4 +9,4 @@ include:
metric_list: metric_list:
- metric: !function utils.bert - metric: !function utils.bert
aggregation: !function utils.bertbase aggregation: !function utils.bertbase
higher_is_better: true higher_is_better: true
\ No newline at end of file
...@@ -9,4 +9,4 @@ include: ...@@ -9,4 +9,4 @@ include:
metric_list: metric_list:
- metric: !function utils.bert - metric: !function utils.bert
aggregation: !function utils.camembert aggregation: !function utils.camembert
higher_is_better: true higher_is_better: true
\ No newline at end of file
...@@ -9,4 +9,4 @@ include: ...@@ -9,4 +9,4 @@ include:
metric_list: metric_list:
- metric: !function utils.bert - metric: !function utils.bert
aggregation: !function utils.arabert aggregation: !function utils.arabert
higher_is_better: true higher_is_better: true
\ No newline at end of file
...@@ -9,4 +9,4 @@ include: ...@@ -9,4 +9,4 @@ include:
metric_list: metric_list:
- metric: !function utils.bert - metric: !function utils.bert
aggregation: !function utils.darijabert aggregation: !function utils.darijabert
higher_is_better: true higher_is_better: true
\ No newline at end of file
...@@ -9,4 +9,4 @@ include: ...@@ -9,4 +9,4 @@ include:
metric_list: metric_list:
- metric: !function utils.bert - metric: !function utils.bert
aggregation: !function utils.darijabert aggregation: !function utils.darijabert
higher_is_better: true higher_is_better: true
\ No newline at end of file
test_split: flores_plus test_split: flores_plus
\ No newline at end of file
...@@ -9,4 +9,4 @@ include: ...@@ -9,4 +9,4 @@ include:
metric_list: metric_list:
- metric: !function utils.bert - metric: !function utils.bert
aggregation: !function utils.bertbase aggregation: !function utils.bertbase
higher_is_better: true higher_is_better: true
\ No newline at end of file
...@@ -9,4 +9,4 @@ include: ...@@ -9,4 +9,4 @@ include:
metric_list: metric_list:
- metric: !function utils.bert - metric: !function utils.bert
aggregation: !function utils.camembert aggregation: !function utils.camembert
higher_is_better: true higher_is_better: true
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment