Commit 90ad5db7 authored by lintangsutawika's avatar lintangsutawika
Browse files

merged main

parents f692caa9 b177c82c
import datasets
import re import re
import datasets
def preprocess(text): def preprocess(text):
text = text.strip() text = text.strip()
......
import yaml
import datasets import datasets
import yaml
from tqdm import tqdm from tqdm import tqdm
def main() -> None: def main() -> None:
dataset_path = "alexandrainst/m_mmlu" dataset_path = "alexandrainst/m_mmlu"
# Removed hy and sk subdataset because the original dataset is broken for task in tqdm(datasets.get_dataset_infos(dataset_path).keys()):
# I created this PR https://huggingface.co/datasets/alexandrainst/m_mmlu/discussions/3
# on the dataset for the authors, in case it will be accepeted the filter can be removed
keys_without_hy_sk = list(filter(lambda k: ('hy' not in k and 'sk' not in k),
datasets.get_dataset_infos(dataset_path).keys()))
for task in tqdm():
file_name = f"m_mmlu_{task}.yaml" file_name = f"m_mmlu_{task}.yaml"
try: try:
with open(f"{file_name}", "w") as f: with open(f"{file_name}", "w") as f:
...@@ -29,5 +22,6 @@ def main() -> None: ...@@ -29,5 +22,6 @@ def main() -> None:
except FileExistsError: except FileExistsError:
pass pass
if __name__ == "__main__": if __name__ == "__main__":
main() main()
# Generated by _generate_configs.py
dataset_name: hy
include: _default_yaml
task: m_mmlu_hy
# Generated by _generate_configs.py
dataset_name: sk
include: _default_yaml
task: m_mmlu_sk
...@@ -4,7 +4,7 @@ dataset_path: null ...@@ -4,7 +4,7 @@ dataset_path: null
dataset_name: null dataset_name: null
output_type: multiple_choice output_type: multiple_choice
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
process_docs: !function utils.process_docs process_docs: !function utils.process_docs
doc_to_text: "query" doc_to_text: "query"
......
...@@ -3,5 +3,5 @@ task: truthfulqa_ar_mc1 ...@@ -3,5 +3,5 @@ task: truthfulqa_ar_mc1
dataset_path: alexandrainst/m_truthfulqa dataset_path: alexandrainst/m_truthfulqa
dataset_name: ar dataset_name: ar
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
...@@ -3,5 +3,5 @@ task: truthfulqa_ar_mc2 ...@@ -3,5 +3,5 @@ task: truthfulqa_ar_mc2
dataset_path: alexandrainst/m_truthfulqa dataset_path: alexandrainst/m_truthfulqa
dataset_name: ar dataset_name: ar
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
...@@ -3,5 +3,5 @@ task: truthfulqa_bn_mc1 ...@@ -3,5 +3,5 @@ task: truthfulqa_bn_mc1
dataset_path: alexandrainst/m_truthfulqa dataset_path: alexandrainst/m_truthfulqa
dataset_name: bn dataset_name: bn
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
...@@ -3,5 +3,5 @@ task: truthfulqa_bn_mc2 ...@@ -3,5 +3,5 @@ task: truthfulqa_bn_mc2
dataset_path: alexandrainst/m_truthfulqa dataset_path: alexandrainst/m_truthfulqa
dataset_name: bn dataset_name: bn
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
...@@ -3,5 +3,5 @@ task: truthfulqa_ca_mc1 ...@@ -3,5 +3,5 @@ task: truthfulqa_ca_mc1
dataset_path: alexandrainst/m_truthfulqa dataset_path: alexandrainst/m_truthfulqa
dataset_name: ca dataset_name: ca
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
...@@ -3,5 +3,5 @@ task: truthfulqa_ca_mc2 ...@@ -3,5 +3,5 @@ task: truthfulqa_ca_mc2
dataset_path: alexandrainst/m_truthfulqa dataset_path: alexandrainst/m_truthfulqa
dataset_name: ca dataset_name: ca
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
...@@ -3,5 +3,5 @@ task: truthfulqa_da_mc1 ...@@ -3,5 +3,5 @@ task: truthfulqa_da_mc1
dataset_path: alexandrainst/m_truthfulqa dataset_path: alexandrainst/m_truthfulqa
dataset_name: da dataset_name: da
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
...@@ -3,5 +3,5 @@ task: truthfulqa_da_mc2 ...@@ -3,5 +3,5 @@ task: truthfulqa_da_mc2
dataset_path: alexandrainst/m_truthfulqa dataset_path: alexandrainst/m_truthfulqa
dataset_name: da dataset_name: da
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
...@@ -3,5 +3,5 @@ task: truthfulqa_de_mc1 ...@@ -3,5 +3,5 @@ task: truthfulqa_de_mc1
dataset_path: alexandrainst/m_truthfulqa dataset_path: alexandrainst/m_truthfulqa
dataset_name: de dataset_name: de
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
...@@ -3,5 +3,5 @@ task: truthfulqa_de_mc2 ...@@ -3,5 +3,5 @@ task: truthfulqa_de_mc2
dataset_path: alexandrainst/m_truthfulqa dataset_path: alexandrainst/m_truthfulqa
dataset_name: de dataset_name: de
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
...@@ -3,5 +3,5 @@ task: truthfulqa_es_mc1 ...@@ -3,5 +3,5 @@ task: truthfulqa_es_mc1
dataset_path: alexandrainst/m_truthfulqa dataset_path: alexandrainst/m_truthfulqa
dataset_name: es dataset_name: es
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
...@@ -3,5 +3,5 @@ task: truthfulqa_es_mc2 ...@@ -3,5 +3,5 @@ task: truthfulqa_es_mc2
dataset_path: alexandrainst/m_truthfulqa dataset_path: alexandrainst/m_truthfulqa
dataset_name: es dataset_name: es
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
...@@ -3,5 +3,5 @@ task: truthfulqa_eu_mc1 ...@@ -3,5 +3,5 @@ task: truthfulqa_eu_mc1
dataset_path: alexandrainst/m_truthfulqa dataset_path: alexandrainst/m_truthfulqa
dataset_name: eu dataset_name: eu
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
...@@ -3,5 +3,5 @@ task: truthfulqa_eu_mc2 ...@@ -3,5 +3,5 @@ task: truthfulqa_eu_mc2
dataset_path: alexandrainst/m_truthfulqa dataset_path: alexandrainst/m_truthfulqa
dataset_name: eu dataset_name: eu
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
...@@ -3,5 +3,5 @@ task: truthfulqa_fr_mc1 ...@@ -3,5 +3,5 @@ task: truthfulqa_fr_mc1
dataset_path: alexandrainst/m_truthfulqa dataset_path: alexandrainst/m_truthfulqa
dataset_name: fr dataset_name: fr
training_split: null training_split: null
validation_split: validation validation_split: val
test_split: null test_split: null
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment