Commit 89b6bdb3 authored by Baber's avatar Baber
Browse files

Merge branch 'main' into ai2d

parents 59053d58 144a1e58
en_answer_mapping = {"true": "yes", "false": "no", True: "yes", False: "no"}
def process_docs(dataset):
def remove_question_mark(text):
text = text.strip()
if text.endswith("?") or text.endswith("؟"):
text = text[:-1]
text = text.strip()
return text
def _helper(doc):
doc["question"] = remove_question_mark(doc["question"])
doc["target"] = en_answer_mapping[doc["answer"]]
return doc
return dataset.map(_helper)
task: AraDiCE_boolq_lev
dataset_path: QCRI/AraDiCE-BoolQ
dataset_name: BoolQ-lev
output_type: multiple_choice
training_split: null
validation_split: null
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{passage}}\nسؤال: {{question}}؟\nجواب:"
doc_to_target: target
doc_to_choice: ["لا", "نعم"]
should_decontaminate: true
doc_to_decontamination_query: passage
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: f1
higher_is_better: true
aggregation: !function metrics.micro_f1_score
metadata:
version: 1.0
from sklearn.metrics import f1_score
def macro_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="macro")
return fscore
def micro_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="micro")
return fscore
def weighted_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="weighted")
return fscore
lev_answer_mapping = {"true": "نعم", "false": "لا", True: "نعم", False: "لا"}
def process_docs(dataset):
def remove_question_mark(text):
text = text.strip()
if text.endswith("?") or text.endswith("؟"):
text = text[:-1]
text = text.strip()
return text
def _helper(doc):
doc["question"] = remove_question_mark(doc["question"])
doc["target"] = lev_answer_mapping[doc["answer"]]
return doc
return dataset.map(_helper)
task: AraDiCE_boolq_msa
dataset_path: QCRI/AraDiCE-BoolQ
dataset_name: BoolQ-msa
output_type: multiple_choice
training_split: null
validation_split: null
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{passage}}\nسؤال: {{question}}؟\nجواب:"
doc_to_target: target
doc_to_choice: ["لا", "نعم"]
should_decontaminate: true
doc_to_decontamination_query: passage
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: f1
higher_is_better: true
aggregation: !function metrics.micro_f1_score
metadata:
version: 1.0
from sklearn.metrics import f1_score
def macro_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="macro")
return fscore
def micro_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="micro")
return fscore
def weighted_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="weighted")
return fscore
msa_answer_mapping = {"true": "نعم", "false": "لا", True: "نعم", False: "لا"}
def process_docs(dataset):
def remove_question_mark(text):
text = text.strip()
if text.endswith("?") or text.endswith("؟"):
text = text[:-1]
text = text.strip()
return text
def _helper(doc):
doc["question"] = remove_question_mark(doc["question"])
doc["target"] = msa_answer_mapping[doc["answer"]]
return doc
return dataset.map(_helper)
task: AraDiCE_egypt_cultural
dataset_path: QCRI/AraDiCE-Culture
dataset_name: Egypt
training_split: null
validation_split: null
test_split: test
output_type: multiple_choice
process_docs: !function utils.process_docs
doc_to_text: "سؤال : {{Question}}\nإجابة :"
doc_to_target: 0
doc_to_choice: choices
should_decontaminate: true
doc_to_decontamination_query: Question
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: f1
higher_is_better: true
aggregation: !function metrics.micro_f1_score
metadata:
version: 1.0
task: AraDiCE_jordan_cultural
dataset_path: QCRI/AraDiCE-Culture
dataset_name: Jordan
training_split: null
validation_split: null
test_split: test
output_type: multiple_choice
process_docs: !function utils.process_docs
doc_to_text: "سؤال : {{Question}}\nإجابة :"
doc_to_target: 0
doc_to_choice: choices
should_decontaminate: true
doc_to_decontamination_query: Question
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: f1
higher_is_better: true
aggregation: !function metrics.micro_f1_score
metadata:
version: 1.0
task: AraDiCE_lebanon_cultural
dataset_path: QCRI/AraDiCE-Culture
dataset_name: Lebanon
training_split: null
validation_split: null
test_split: test
output_type: multiple_choice
process_docs: !function utils.process_docs
doc_to_text: "سؤال : {{Question}}\nإجابة :"
doc_to_target: 0
doc_to_choice: choices
should_decontaminate: true
doc_to_decontamination_query: Question
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: f1
higher_is_better: true
aggregation: !function metrics.micro_f1_score
metadata:
version: 1.0
from sklearn.metrics import f1_score
def macro_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="macro")
return fscore
def micro_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="micro")
return fscore
def weighted_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="weighted")
return fscore
task: AraDiCE_palestine_cultural
dataset_path: QCRI/AraDiCE-Culture
dataset_name: Palestine
training_split: null
validation_split: null
test_split: test
output_type: multiple_choice
process_docs: !function utils.process_docs
doc_to_text: "سؤال : {{Question}}\nإجابة :"
doc_to_target: 0
doc_to_choice: choices
should_decontaminate: true
doc_to_decontamination_query: Question
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: f1
higher_is_better: true
aggregation: !function metrics.micro_f1_score
metadata:
version: 1.0
task: AraDiCE_qatar_cultural
dataset_path: QCRI/AraDiCE-Culture
dataset_name: Qatar
training_split: null
validation_split: null
test_split: test
output_type: multiple_choice
process_docs: !function utils.process_docs
doc_to_text: "سؤال : {{Question}}\nإجابة :"
doc_to_target: 0
doc_to_choice: choices
should_decontaminate: true
doc_to_decontamination_query: Question
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: f1
higher_is_better: true
aggregation: !function metrics.micro_f1_score
metadata:
version: 1.0
task: AraDiCE_syria_cultural
dataset_path: QCRI/AraDiCE-Culture
dataset_name: Syria
training_split: null
validation_split: null
test_split: test
output_type: multiple_choice
process_docs: !function utils.process_docs
doc_to_text: "سؤال : {{Question}}\nإجابة :"
doc_to_target: 0
doc_to_choice: choices
should_decontaminate: true
doc_to_decontamination_query: Question
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: f1
higher_is_better: true
aggregation: !function metrics.micro_f1_score
metadata:
version: 1.0
def process_docs(dataset):
def _helper(doc):
doc["choices"] = [doc["Option A"], doc["Option B"], doc["Option C"]]
return doc
return dataset.map(_helper)
from sklearn.metrics import f1_score
def macro_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="macro")
return fscore
def micro_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="micro")
return fscore
def weighted_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="weighted")
return fscore
task: AraDiCE_openbookqa_egy
dataset_path: QCRI/AraDiCE-OpenBookQA
dataset_name: OBQA-egy
training_split: null
validation_split: null
test_split: test
output_type: multiple_choice
doc_to_text: !function utils.doc_to_text
doc_to_target: !function utils.doc_to_target
doc_to_choice: !function utils.doc_to_choice
should_decontaminate: true
doc_to_decontamination_query: "{{question.stem}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: f1
higher_is_better: true
aggregation: !function metrics.micro_f1_score
metadata:
version: 1.0
task: AraDiCE_openbookqa_eng
dataset_path: QCRI/AraDiCE-OpenBookQA
dataset_name: OBQA-eng
training_split: null
validation_split: null
test_split: test
output_type: multiple_choice
doc_to_text: !function utils.doc_to_text
doc_to_target: !function utils.doc_to_target
doc_to_choice: !function utils.doc_to_choice
should_decontaminate: true
doc_to_decontamination_query: "{{question.stem}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: f1
higher_is_better: true
aggregation: !function metrics.micro_f1_score
metadata:
version: 1.0
task: AraDiCE_openbookqa_lev
dataset_path: QCRI/AraDiCE-OpenBookQA
dataset_name: OBQA-lev
training_split: null
validation_split: null
test_split: test
output_type: multiple_choice
doc_to_text: !function utils.doc_to_text
doc_to_target: !function utils.doc_to_target
doc_to_choice: !function utils.doc_to_choice
should_decontaminate: true
doc_to_decontamination_query: "{{question.stem}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: f1
higher_is_better: true
aggregation: !function metrics.micro_f1_score
metadata:
version: 1.0
task: AraDiCE_openbookqa_msa
dataset_path: QCRI/AraDiCE-OpenBookQA
dataset_name: OBQA-msa
training_split: null
validation_split: null
test_split: test
output_type: multiple_choice
doc_to_text: !function utils.doc_to_text
doc_to_target: !function utils.doc_to_target
doc_to_choice: !function utils.doc_to_choice
should_decontaminate: true
doc_to_decontamination_query: "{{question.stem}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: f1
higher_is_better: true
aggregation: !function metrics.micro_f1_score
metadata:
version: 1.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment