Commit 33f2f9bf authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into superglue

parents e1fdf2a8 7634a6ec
# Generated by utils.py
dataset_name: principle_A_domain_3
include: template_yaml
task: blimp_principle_A_domain_3
# Generated by utils.py
dataset_name: principle_A_reconstruction
include: template_yaml
task: blimp_principle_A_reconstruction
# Generated by utils.py
dataset_name: regular_plural_subject_verb_agreement_1
include: template_yaml
task: blimp_regular_plural_subject_verb_agreement_1
# Generated by utils.py
dataset_name: regular_plural_subject_verb_agreement_2
include: template_yaml
task: blimp_regular_plural_subject_verb_agreement_2
# Generated by utils.py
dataset_name: sentential_negation_npi_licensor_present
include: template_yaml
task: blimp_sentential_negation_npi_licensor_present
# Generated by utils.py
dataset_name: sentential_negation_npi_scope
include: template_yaml
task: blimp_sentential_negation_npi_scope
# Generated by utils.py
dataset_name: sentential_subject_island
include: template_yaml
task: blimp_sentential_subject_island
# Generated by utils.py
dataset_name: superlative_quantifiers_1
include: template_yaml
task: blimp_superlative_quantifiers_1
# Generated by utils.py
dataset_name: superlative_quantifiers_2
include: template_yaml
task: blimp_superlative_quantifiers_2
group: blimp
dataset_path: blimp
output_type: multiple_choice
validation_split: validation
doc_to_text: ""
doc_to_target: 0
doc_to_choice: "{{[sentence_good, sentence_bad]}}"
should_decontaminate: true
doc_to_decontamination_query: "{{sentence_good}} {{sentence_bad}}"
metric_list:
- metric: acc
# Generated by utils.py
dataset_name: tough_vs_raising_1
include: template_yaml
task: blimp_tough_vs_raising_1
# Generated by utils.py
dataset_name: tough_vs_raising_2
include: template_yaml
task: blimp_tough_vs_raising_2
# Generated by utils.py
dataset_name: transitive
include: template_yaml
task: blimp_transitive
# Generated by utils.py
dataset_name: wh_island
include: template_yaml
task: blimp_wh_island
# Generated by utils.py
dataset_name: wh_questions_object_gap
include: template_yaml
task: blimp_wh_questions_object_gap
# Generated by utils.py
dataset_name: wh_questions_subject_gap
include: template_yaml
task: blimp_wh_questions_subject_gap
# Generated by utils.py
dataset_name: wh_questions_subject_gap_long_distance
include: template_yaml
task: blimp_wh_questions_subject_gap_long_distance
# Generated by utils.py
dataset_name: wh_vs_that_no_gap
include: template_yaml
task: blimp_wh_vs_that_no_gap
# Generated by utils.py
dataset_name: wh_vs_that_no_gap_long_distance
include: template_yaml
task: blimp_wh_vs_that_no_gap_long_distance
# Generated by utils.py
dataset_name: wh_vs_that_with_gap
include: template_yaml
task: blimp_wh_vs_that_with_gap
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment