Commit 4eecbabb authored by Baber's avatar Baber
Browse files

Merge branch 'main' into prefill

parents dac8b534 fb963f0f
group: arabic_leaderboard_arabic_mt_arc_easy_light
task:
- arabic_mt_arc_easy_light
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: true
- metric: acc_norm
aggregation: mean
weight_by_size: true
metadata:
version: 1.0
task: arabic_mt_arc_easy_light
dataset_path: arcee-globe/AlGhafa-Arabic-LLM-Benchmark-Translated-10percent
dataset_name: arc_easy_ar
output_type: multiple_choice
training_split: null
validation_split: validation
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: validation
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
import datasets
import numpy as np
def process_docs(dataset: datasets.Dataset):
def _process_doc(doc):
question = doc["query"]
answer_index = int(doc["label"])
# Dynamically determining the choices by excluding '__few_shots', 'query' and 'label'
choices_keys = [
key for key in doc.keys() if key not in ["query", "label", "__few_shots"]
]
choices = [doc[key] for key in choices_keys]
instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n"
query = f"{instruction}السؤال: {question}\n"
for index, choice in enumerate(choices):
query += f"{index}) {choice}\n"
query += "الإجابة:"
return {"query": query, "choices": choices, "gold": answer_index}
return dataset.map(_process_doc)
group: arabic_leaderboard_arabic_mt_boolq_light
task:
- arabic_mt_boolq_light
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: true
- metric: acc_norm
aggregation: mean
weight_by_size: true
metadata:
version: 1.0
task: arabic_mt_boolq_light
dataset_path: arcee-globe/AlGhafa-Arabic-LLM-Benchmark-Translated-10percent
dataset_name: boolq_ar
output_type: multiple_choice
training_split: null
validation_split: validation
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: validation
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
import datasets
import numpy as np
def process_docs(dataset: datasets.Dataset):
def _process_doc(doc):
question = doc["question"]
passage = doc["passage"]
instruction = "بناء على المقطع التالي، أجب عن السؤال ب نعم أو لا"
query = f"""{instruction}
المقطع :
{passage}
السؤال:
{question}
الإجابة:
"""
return {
"query": query,
"choices": ["نعم", "لا"],
"gold": 0 if doc["answer"] else 1,
}
return dataset.map(_process_doc)
task: arabic_mt_copa_light
dataset_path: arcee-globe/AlGhafa-Arabic-LLM-Benchmark-Translated-10percent
dataset_name: copa_ext_ar
output_type: multiple_choice
training_split: null
validation_split: validation
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: validation
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
group: arabic_leaderboard_arabic_mt_copa_light
task:
- arabic_mt_copa_light
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: true
- metric: acc_norm
aggregation: mean
weight_by_size: true
metadata:
version: 1.0
import datasets
import numpy as np
def process_docs(dataset: datasets.Dataset):
def _process_doc(doc):
premise = doc["premise"]
choices = [doc["choice1"], doc["choice2"]]
question_map = {"cause": "لأن", "effect": "لذلك"}
question = question_map[doc["question"]]
answer = doc["label"]
query = "{}، {} :\n0) {}\n1) {}\nالإجابة:".format(
premise, question, choices[0], choices[1]
)
return {"query": query, "choices": choices, "gold": answer}
return dataset.map(_process_doc)
group: arabic_leaderboard_arabic_mt_hellaswag_light
task:
- arabic_mt_hellaswag_light
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: true
- metric: acc_norm
aggregation: mean
weight_by_size: true
metadata:
version: 1.0
task: arabic_mt_hellaswag_light
dataset_path: arcee-globe/AlGhafa-Arabic-LLM-Benchmark-Translated-10percent
dataset_name: hellaswag_okapi_ar
output_type: multiple_choice
training_split: null
validation_split: validation
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: validation
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
import re
import datasets
import numpy as np
def process_docs(dataset: datasets.Dataset):
def _process_doc(doc):
ctx = re.sub(r"\[.*?\]", "", doc["ctx"]) # Remove latin words within brackets
endings = [
re.sub(r"\[.*?\]", "", e) for e in eval(doc["endings"])
] # endings is a string representation of a list
answer_index = doc["label"]
instruction = (
"بناء على السياق التالي، اختر النهاية الصحيحة من الاقتراحات التالية"
)
query = f"""{instruction}
السياق:
{ctx}
الاقتراحات:
"""
for i, ending in enumerate(endings):
query += f"{i}) {ending}\n"
query += "الإجابة:"
return {"query": query, "choices": endings, "gold": answer_index}
return dataset.map(_process_doc)
group: arabic_leaderboard_arabic_mt_mmlu_light
task:
- arabic_mt_mmlu_light
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: true
- metric: acc_norm
aggregation: mean
weight_by_size: true
metadata:
version: 1.0
task: arabic_mt_mmlu_light
dataset_path: arcee-globe/AlGhafa-Arabic-LLM-Benchmark-Translated-10percent
dataset_name: mmlu_okapi_ar
output_type: multiple_choice
training_split: null
validation_split: validation
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: validation
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
import datasets
import numpy as np
def process_docs(dataset: datasets.Dataset):
def _process_doc(doc):
question = doc["query"]
answer_index = int(doc["label"])
# Dynamically determining the choices by excluding '__few_shots', 'query' and 'label'
choices_keys = [
key for key in doc.keys() if key not in ["query", "label", "__few_shots"]
]
choices = [doc[key] for key in choices_keys]
instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n"
query = f"{instruction}السؤال: {question}\n"
for index, choice in enumerate(choices):
query += f"{index}) {choice}\n"
query += "الإجابة:"
return {"query": query, "choices": choices, "gold": answer_index}
return dataset.map(_process_doc)
group: arabic_leaderboard_arabic_mt_openbook_qa_light
task:
- arabic_mt_openbook_qa_light
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: true
- metric: acc_norm
aggregation: mean
weight_by_size: true
metadata:
version: 1.0
task: arabic_mt_openbook_qa_light
dataset_path: arcee-globe/AlGhafa-Arabic-LLM-Benchmark-Translated-10percent
dataset_name: openbook_qa_ext_ar
output_type: multiple_choice
training_split: null
validation_split: validation
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: validation
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
import datasets
import numpy as np
def process_docs(dataset: datasets.Dataset):
def _process_doc(doc):
question = doc["query"]
answer_index = int(doc["label"])
# Dynamically determining the choices by excluding '__few_shots', 'query' and 'label'
choices_keys = [
key for key in doc.keys() if key not in ["query", "label", "__few_shots"]
]
choices = [doc[key] for key in choices_keys]
instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n"
query = f"{instruction}السؤال: {question}\n"
for index, choice in enumerate(choices):
query += f"{index}) {choice}\n"
query += "الإجابة:"
return {"query": query, "choices": choices, "gold": answer_index}
return dataset.map(_process_doc)
group: arabic_leaderboard_arabic_mt_piqa_light
task:
- arabic_mt_piqa_light
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: true
- metric: acc_norm
aggregation: mean
weight_by_size: true
metadata:
version: 1.0
task: arabic_mt_piqa_light
dataset_path: arcee-globe/AlGhafa-Arabic-LLM-Benchmark-Translated-10percent
dataset_name: piqa_ar
output_type: multiple_choice
training_split: null
validation_split: validation
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: validation
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment