Commit 90ad5db7 authored by lintangsutawika's avatar lintangsutawika
Browse files

merged main

parents f692caa9 b177c82c
"dataset_name": "Telecommunications-and-Wireless-Technology"
"include": "_default_kmmlu_yaml"
"task": "kmmlu_telecommunications_and_wireless_technology"
......@@ -6,32 +6,43 @@ def copa_doc_to_text(doc: dict) -> str:
connector = {"원인": " 왜냐하면", "결과": " 그래서"}[doc["question"].strip()]
return f"""{doc["premise"]} {connector}"""
def copa_doc_to_target(doc: dict) -> str:
correct_choice = doc["alternative_1"] if doc["label"] == 0 else doc["alternative_2"]
return f"""{correct_choice}"""
def copa_doc_to_choice(doc: dict) -> list:
return [f"""{doc["alternative_1"]}""", f"""{doc["alternative_2"]}"""]
def sentineg_doc_to_text(doc: dict):
return f"""문장: {doc["sentence"]} 긍부정:"""
def wic_doc_to_text(doc: dict) -> str:
return f"""문장1: {doc["context_1"]} 문장2: {doc["context_2"]} 두 문장에서 {doc["word"]}가 같은 뜻으로 쓰였나?"""
def hellaswag_process_doc(doc: Dataset) -> Dataset:
def preprocessor(dataset):
return {
"query": f"""문장: {dataset["context"]}""",
"choices": [dataset["ending_1"], dataset["ending_2"], dataset["ending_3"], dataset["ending_4"]],
"choices": [
dataset["ending_1"],
dataset["ending_2"],
dataset["ending_3"],
dataset["ending_4"],
],
"gold": int(dataset["label"]),
}
return doc.map(preprocessor)
def macro_f1_score(items):
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average='macro')
fscore = f1_score(golds, preds, average="macro")
return fscore
......@@ -10,7 +10,12 @@ def doc_to_text(doc) -> str:
Answer:
"""
choices = [doc["opa"], doc["opb"], doc["opc"], doc["opd"]]
option_choices = {'A': choices[0], 'B': choices[1], 'C': choices[2], 'D': choices[3]}
option_choices = {
"A": choices[0],
"B": choices[1],
"C": choices[2],
"D": choices[3],
}
prompt = "Question: " + doc["question"] + "\nChoices:\n"
for choice, option in option_choices.items():
......
def doc_to_text(doc) -> str:
option_choices = {'A': doc["ending0"], 'B': doc["ending1"], 'C': doc["ending2"], 'D': doc["ending3"]}
option_choices = {
"A": doc["ending0"],
"B": doc["ending1"],
"C": doc["ending2"],
"D": doc["ending3"],
}
answers = "".join((f"{k}. {v}\n") for k, v in option_choices.items())
return f"Question: {doc['sent1']}\n{answers}Answer:"
......
......@@ -19,6 +19,12 @@ filter_list:
filter:
- function: remove_whitespace
- function: take_first
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
metric_list:
- metric: exact_match
aggregation: mean
......@@ -26,4 +32,4 @@ metric_list:
ignore_case: true
ignore_punctuation: true
metadata:
version: 1.0
version: 2.0
# Generated by utils.py
dataset_name: bn
doc_to_target: '{% if answer is not none %}{{answer[6+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer"}}{% else %}{{"প্রশ্ন:
"+question+"\nAnswer"}}{% endif %}'
doc_to_target: '{% if answer is not none %}{{answer[17:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"প্রশ্ন: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'প্রশ্ন:'
- </s>
- <|im_end|>
include: direct_yaml
task: mgsm_direct_bn
# Generated by utils.py
dataset_name: de
doc_to_target: '{% if answer is not none %}{{answer[7+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAntwort"}}{% else %}{{"Frage:
"+question+"\nAntwort"}}{% endif %}'
doc_to_target: '{% if answer is not none %}{{answer[29:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAntwort:"}}{% else %}{{"Frage: "+question+"\nAntwort:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Frage:'
- </s>
- <|im_end|>
include: direct_yaml
task: mgsm_direct_de
# Generated by utils.py
dataset_name: en
doc_to_target: '{% if answer is not none %}{{answer[6+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer"}}{% else %}{{"Question:
"+question+"\nAnswer"}}{% endif %}'
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: direct_yaml
task: mgsm_direct_en
# Generated by utils.py
dataset_name: es
doc_to_target: '{% if answer is not none %}{{answer[6+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer"}}{% else %}{{"Pregunta:
"+question+"\nAnswer"}}{% endif %}'
doc_to_target: '{% if answer is not none %}{{answer[23:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nRespuesta:"}}{% else %}{{"Pregunta: "+question+"\nRespuesta:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Pregunta:'
- </s>
- <|im_end|>
include: direct_yaml
task: mgsm_direct_es
# Generated by utils.py
dataset_name: fr
doc_to_target: '{% if answer is not none %}{{answer[6+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer"}}{% else %}{{"Question
: "+question+"\nAnswer"}}{% endif %}'
doc_to_target: '{% if answer is not none %}{{answer[26:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nRéponse :"}}{% else %}{{"Question : "+question+"\nRéponse :"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question :'
- </s>
- <|im_end|>
include: direct_yaml
task: mgsm_direct_fr
# Generated by utils.py
dataset_name: ja
doc_to_target: '{% if answer is not none %}{{answer[6+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer"}}{% else %}{{"問題: "+question+"\nAnswer"}}{%
endif %}'
doc_to_target: '{% if answer is not none %}{{answer[11:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"問題: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- '問題:'
- </s>
- <|im_end|>
include: direct_yaml
task: mgsm_direct_ja
# Generated by utils.py
dataset_name: ru
doc_to_target: '{% if answer is not none %}{{answer[6+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer"}}{% else %}{{"Задача:
"+question+"\nAnswer"}}{% endif %}'
doc_to_target: '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Задача: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Задача:'
- </s>
- <|im_end|>
include: direct_yaml
task: mgsm_direct_ru
# Generated by utils.py
dataset_name: sw
doc_to_target: '{% if answer is not none %}{{answer[6+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer"}}{% else %}{{"Swali:
"+question+"\nAnswer"}}{% endif %}'
doc_to_target: '{% if answer is not none %}{{answer[25:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Swali: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Swali:'
- </s>
- <|im_end|>
include: direct_yaml
task: mgsm_direct_sw
# Generated by utils.py
dataset_name: te
doc_to_target: '{% if answer is not none %}{{answer[6+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer"}}{% else %}{{"ప్రశ్న:
"+question+"\nAnswer"}}{% endif %}'
doc_to_target: '{% if answer is not none %}{{answer[19:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"ప్రశ్న: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'ప్రశ్న:'
- </s>
- <|im_end|>
include: direct_yaml
task: mgsm_direct_te
# Generated by utils.py
dataset_name: th
doc_to_target: '{% if answer is not none %}{{answer[6+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer"}}{% else %}{{"โจทย์:
"+question+"\nAnswer"}}{% endif %}'
doc_to_target: '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"โจทย์: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'โจทย์:'
- </s>
- <|im_end|>
include: direct_yaml
task: mgsm_direct_th
# Generated by utils.py
dataset_name: zh
doc_to_target: '{% if answer is not none %}{{answer[6+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer"}}{% else %}{{"问题: "+question+"\nAnswer"}}{%
endif %}'
doc_to_target: '{% if answer is not none %}{{answer[6:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"问题: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- '问题:'
- </s>
- <|im_end|>
include: direct_yaml
task: mgsm_direct_zh
......@@ -7,7 +7,6 @@ dataset_name: null # Overridden by language-specific config.
output_type: generate_until
training_split: train
test_split: test
target_delimiter: ""
generation_kwargs:
until:
- "\n\n"
......@@ -22,10 +21,16 @@ metric_list:
ignore_case: true
ignore_punctuation: true
filter_list:
- name: "get-answer"
- name: "strict-match"
filter:
- function: "regex"
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
metadata:
version: 1.0
version: 2.0
# Generated by utils.py
dataset_name: bn
doc_to_target: '{% if answer is not none %}{{answer[20+1]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_target: '{% if answer is not none %}{{answer[17:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"প্রশ্ন: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'প্রশ্ন:'
- </s>
- <|im_end|>
include: cot_yaml
task: mgsm_bn_direct
task: mgsm_en_cot_bn
# Generated by utils.py
dataset_name: de
doc_to_target: '{% if answer is not none %}{{answer[20+1]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_target: '{% if answer is not none %}{{answer[29:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Frage: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Frage:'
- </s>
- <|im_end|>
include: cot_yaml
task: mgsm_de_direct
task: mgsm_en_cot_de
# Generated by utils.py
dataset_name: en
doc_to_target: '{% if answer is not none %}{{answer[20+1]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: cot_yaml
task: mgsm_en_direct
task: mgsm_en_cot_en
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment