Unverified Commit 9b6b0f5e authored by jonabur's avatar jonabur Committed by GitHub
Browse files

add arc_challenge_mt (#1900)

* add arc_challenge_mt

* add README

* add icelandic
parent 0ae3d3eb
# arc mt
arc mt is an implementation of tasks to support machine translated arc
challenge evals, to improve eval support across a number of additional
languages.
The main page for the effort is
[here](https://huggingface.co/datasets/LumiOpen/arc_challenge_mt) and we will
include more data and analysis there.
Initial datasets include a number of European languages, and we plan to expand
more in the future.
include: arc_challenge_mt_fi.yaml
task: arc_challenge_mt_da
dataset_name: da
include: arc_challenge_mt_fi.yaml
task: arc_challenge_mt_de
dataset_name: de
include: arc_challenge_mt_fi.yaml
task: arc_challenge_mt_el
dataset_name: el
include: arc_challenge_mt_fi.yaml
task: arc_challenge_mt_es
dataset_name: es
group:
- arc_challenge_mt
task: arc_challenge_mt_fi
dataset_path: LumiOpen/arc_challenge_mt
dataset_name: fi
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: test
doc_to_text: "Question: {{question}}\nAnswer:"
doc_to_target: "{{choices.label.index(answerKey)}}"
doc_to_choice: "{{choices.text}}"
should_decontaminate: true
doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
include: arc_challenge_mt_fi.yaml
task: arc_challenge_mt_hu
dataset_name: hu
group:
- arc_challenge_mt
task: arc_challenge_mt_is
dataset_path: mideind/icelandic-arc-challenge
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: test
doc_to_text: "Question: {{question}}\nAnswer:"
doc_to_target: "{{choices.label.index(answerKey)}}"
doc_to_choice: "{{choices.text}}"
should_decontaminate: true
doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
include: arc_challenge_mt_fi.yaml
task: arc_challenge_mt_it
dataset_name: it
include: arc_challenge_mt_fi.yaml
task: arc_challenge_mt_nb
dataset_name: nb
include: arc_challenge_mt_fi.yaml
task: arc_challenge_mt_pl
dataset_name: pl
include: arc_challenge_mt_fi.yaml
task: arc_challenge_mt_pt
dataset_name: pt
include: arc_challenge_mt_fi.yaml
task: arc_challenge_mt_sv
dataset_name: sv
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment